1 /* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+)
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Licensed under the MIT license:
41 Permission is hereby granted, free of charge, to any person obtaining
42 a copy of this software and associated documentation files (the
43 "Software"), to deal in the Software without restriction, including
44 without limitation the rights to use, copy, modify, merge, publish,
45 distribute, sublicense, and/or sell copies of the Software, and to permit
46 persons to whom the Software is furnished to do so, subject to the
49 The above copyright notice and this permission notice shall be included
50 in all copies or substantial portions of the Software.
52 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
53 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
54 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
55 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
56 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
57 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
58 USE OR OTHER DEALINGS IN THE SOFTWARE.
61 #define XML_BUILDING_EXPAT 1
63 #include <expat_config.h>
65 #if ! defined(_GNU_SOURCE)
66 # define _GNU_SOURCE 1 /* syscall prototype */
70 /* force stdlib to define rand_s() */
71 # if ! defined(_CRT_RAND_S)
77 #include <string.h> /* memset(), memcpy() */
79 #include <limits.h> /* UINT_MAX */
80 #include <stdio.h> /* fprintf */
81 #include <stdlib.h> /* getenv, rand_s */
82 #include <stdint.h> /* uintptr_t */
83 #include <math.h> /* isnan */
86 # define getpid GetCurrentProcessId
88 # include <sys/time.h> /* gettimeofday() */
89 # include <sys/types.h> /* getpid() */
90 # include <unistd.h> /* getpid() */
91 # include <fcntl.h> /* O_RDONLY */
96 # include "winconfig.h"
103 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
104 # if defined(HAVE_GETRANDOM)
105 # include <sys/random.h> /* getrandom */
107 # include <unistd.h> /* syscall */
108 # include <sys/syscall.h> /* SYS_getrandom */
110 # if ! defined(GRND_NONBLOCK)
111 # define GRND_NONBLOCK 0x0001
112 # endif /* defined(GRND_NONBLOCK) */
113 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
115 #if defined(HAVE_LIBBSD) \
116 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
117 # include <bsd/stdlib.h>
120 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
121 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
124 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
125 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
126 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
127 && ! defined(XML_POOR_ENTROPY)
128 # error You do not have support for any sources of high quality entropy \
129 enabled. For end user security, that is probably not what you want. \
131 Your options include: \
132 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
133 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
134 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
135 * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
136 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
137 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
138 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
139 * Windows >=Vista (rand_s): _WIN32. \
141 If insist on not using any of these, bypass this error by defining \
142 XML_POOR_ENTROPY; you have been warned. \
144 If you have reasons to patch this detection code away or need changes \
145 to the build system, please open a bug. Thank you!
149 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
150 # define XmlConvert XmlUtf16Convert
151 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
152 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
153 # define XmlEncode XmlUtf16Encode
154 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
155 typedef unsigned short ICHAR
;
157 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
158 # define XmlConvert XmlUtf8Convert
159 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
160 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
161 # define XmlEncode XmlUtf8Encode
162 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
168 # define XmlInitEncodingNS XmlInitEncoding
169 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
170 # undef XmlGetInternalEncodingNS
171 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
172 # define XmlParseXmlDeclNS XmlParseXmlDecl
178 # ifdef XML_UNICODE_WCHAR_T
179 # define XML_T(x) (const wchar_t) x
180 # define XML_L(x) L##x
182 # define XML_T(x) (const unsigned short)x
193 /* Round up n to be a multiple of sz, where sz is a power of 2. */
194 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
196 /* Do safe (NULL-aware) pointer arithmetic */
197 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
199 #include "internal.h"
203 typedef const XML_Char
*KEY
;
214 const XML_Memory_Handling_Suite
*mem
;
217 static size_t keylen(KEY s
);
219 static void copy_salt_to_sipkey(XML_Parser parser
, struct sipkey
*key
);
221 /* For probing (after a collision) we need a step size relative prime
222 to the hash table size, which is a power of 2. We use double-hashing,
223 since we can calculate a second hash value cheaply by taking those bits
224 of the first hash value that were discarded (masked out) when the table
225 index was calculated: index = hash & mask, where mask = table->size - 1.
226 We limit the maximum step size to table->size / 4 (mask >> 2) and make
227 it odd, since odd numbers are always relative prime to a power of 2.
229 #define SECOND_HASH(hash, mask, power) \
230 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
231 #define PROBE_STEP(hash, mask, power) \
232 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
239 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
240 #define INIT_DATA_BUF_SIZE 1024
241 #define INIT_ATTS_SIZE 16
242 #define INIT_ATTS_VERSION 0xFFFFFFFF
243 #define INIT_BLOCK_SIZE 1024
244 #define INIT_BUFFER_SIZE 1024
246 #define EXPAND_SPARE 24
248 typedef struct binding
{
249 struct prefix
*prefix
;
250 struct binding
*nextTagBinding
;
251 struct binding
*prevPrefixBinding
;
252 const struct attribute_id
*attId
;
258 typedef struct prefix
{
259 const XML_Char
*name
;
265 const XML_Char
*localPart
;
266 const XML_Char
*prefix
;
272 /* TAG represents an open element.
273 The name of the element is stored in both the document and API
274 encodings. The memory buffer 'buf' is a separately-allocated
275 memory area which stores the name. During the XML_Parse()/
276 XMLParseBuffer() when the element is open, the memory for the 'raw'
277 version of the name (in the document encoding) is shared with the
278 document buffer. If the element is open across calls to
279 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
280 contain the 'raw' name as well.
282 A parser re-uses these structures, maintaining a list of allocated
283 TAG objects in a free list.
286 struct tag
*parent
; /* parent of this element */
287 const char *rawName
; /* tagName in the original encoding */
289 TAG_NAME name
; /* tagName in the API encoding */
290 char *buf
; /* buffer for name components */
291 char *bufEnd
; /* end of the buffer */
296 const XML_Char
*name
;
297 const XML_Char
*textPtr
;
298 int textLen
; /* length in XML_Chars */
299 int processed
; /* # of processed bytes - when suspended */
300 const XML_Char
*systemId
;
301 const XML_Char
*base
;
302 const XML_Char
*publicId
;
303 const XML_Char
*notation
;
306 XML_Bool is_internal
; /* true if declared in internal subset outside PE */
310 enum XML_Content_Type type
;
311 enum XML_Content_Quant quant
;
312 const XML_Char
*name
;
319 #define INIT_SCAFFOLD_ELEMENTS 32
321 typedef struct block
{
333 const XML_Memory_Handling_Suite
*mem
;
336 /* The XML_Char before the name is used to determine whether
337 an attribute has been specified. */
338 typedef struct attribute_id
{
341 XML_Bool maybeTokenized
;
346 const ATTRIBUTE_ID
*id
;
348 const XML_Char
*value
;
352 unsigned long version
;
354 const XML_Char
*uriName
;
358 const XML_Char
*name
;
360 const ATTRIBUTE_ID
*idAtt
;
362 int allocDefaultAtts
;
363 DEFAULT_ATTRIBUTE
*defaultAtts
;
367 HASH_TABLE generalEntities
;
368 HASH_TABLE elementTypes
;
369 HASH_TABLE attributeIds
;
372 STRING_POOL entityValuePool
;
373 /* false once a parameter entity reference has been skipped */
374 XML_Bool keepProcessing
;
375 /* true once an internal or external PE reference has been encountered;
376 this includes the reference to an external subset */
377 XML_Bool hasParamEntityRefs
;
380 /* indicates if external PE has been read */
381 XML_Bool paramEntityRead
;
382 HASH_TABLE paramEntities
;
384 PREFIX defaultPrefix
;
385 /* === scaffolding for building content model === */
387 CONTENT_SCAFFOLD
*scaffold
;
388 unsigned contentStringLen
;
395 typedef struct open_internal_entity
{
396 const char *internalEventPtr
;
397 const char *internalEventEndPtr
;
398 struct open_internal_entity
*next
;
401 XML_Bool betweenDecl
; /* WFC: PE Between Declarations */
402 } OPEN_INTERNAL_ENTITY
;
405 XML_ACCOUNT_DIRECT
, /* bytes directly passed to the Expat parser */
406 XML_ACCOUNT_ENTITY_EXPANSION
, /* intermediate bytes produced during entity
408 XML_ACCOUNT_NONE
/* i.e. do not account, was accounted already */
412 typedef unsigned long long XmlBigCount
;
413 typedef struct accounting
{
414 XmlBigCount countBytesDirect
;
415 XmlBigCount countBytesIndirect
;
417 float maximumAmplificationFactor
; // >=1.0
418 unsigned long long activationThresholdBytes
;
421 typedef struct entity_stats
{
422 unsigned int countEverOpened
;
423 unsigned int currentDepth
;
424 unsigned int maximumDepthSeen
;
429 typedef enum XML_Error PTRCALL
Processor(XML_Parser parser
, const char *start
,
430 const char *end
, const char **endPtr
);
432 static Processor prologProcessor
;
433 static Processor prologInitProcessor
;
434 static Processor contentProcessor
;
435 static Processor cdataSectionProcessor
;
437 static Processor ignoreSectionProcessor
;
438 static Processor externalParEntProcessor
;
439 static Processor externalParEntInitProcessor
;
440 static Processor entityValueProcessor
;
441 static Processor entityValueInitProcessor
;
443 static Processor epilogProcessor
;
444 static Processor errorProcessor
;
445 static Processor externalEntityInitProcessor
;
446 static Processor externalEntityInitProcessor2
;
447 static Processor externalEntityInitProcessor3
;
448 static Processor externalEntityContentProcessor
;
449 static Processor internalEntityProcessor
;
451 static enum XML_Error
handleUnknownEncoding(XML_Parser parser
,
452 const XML_Char
*encodingName
);
453 static enum XML_Error
processXmlDecl(XML_Parser parser
, int isGeneralTextEntity
,
454 const char *s
, const char *next
);
455 static enum XML_Error
initializeEncoding(XML_Parser parser
);
456 static enum XML_Error
doProlog(XML_Parser parser
, const ENCODING
*enc
,
457 const char *s
, const char *end
, int tok
,
458 const char *next
, const char **nextPtr
,
459 XML_Bool haveMore
, XML_Bool allowClosingDoctype
,
460 enum XML_Account account
);
461 static enum XML_Error
processInternalEntity(XML_Parser parser
, ENTITY
*entity
,
462 XML_Bool betweenDecl
);
463 static enum XML_Error
doContent(XML_Parser parser
, int startTagLevel
,
464 const ENCODING
*enc
, const char *start
,
465 const char *end
, const char **endPtr
,
466 XML_Bool haveMore
, enum XML_Account account
);
467 static enum XML_Error
doCdataSection(XML_Parser parser
, const ENCODING
*,
468 const char **startPtr
, const char *end
,
469 const char **nextPtr
, XML_Bool haveMore
,
470 enum XML_Account account
);
472 static enum XML_Error
doIgnoreSection(XML_Parser parser
, const ENCODING
*,
473 const char **startPtr
, const char *end
,
474 const char **nextPtr
, XML_Bool haveMore
);
477 static void freeBindings(XML_Parser parser
, BINDING
*bindings
);
478 static enum XML_Error
storeAtts(XML_Parser parser
, const ENCODING
*,
479 const char *s
, TAG_NAME
*tagNamePtr
,
480 BINDING
**bindingsPtr
,
481 enum XML_Account account
);
482 static enum XML_Error
addBinding(XML_Parser parser
, PREFIX
*prefix
,
483 const ATTRIBUTE_ID
*attId
, const XML_Char
*uri
,
484 BINDING
**bindingsPtr
);
485 static int defineAttribute(ELEMENT_TYPE
*type
, ATTRIBUTE_ID
*, XML_Bool isCdata
,
486 XML_Bool isId
, const XML_Char
*dfltValue
,
488 static enum XML_Error
storeAttributeValue(XML_Parser parser
, const ENCODING
*,
489 XML_Bool isCdata
, const char *,
490 const char *, STRING_POOL
*,
491 enum XML_Account account
);
492 static enum XML_Error
appendAttributeValue(XML_Parser parser
, const ENCODING
*,
493 XML_Bool isCdata
, const char *,
494 const char *, STRING_POOL
*,
495 enum XML_Account account
);
496 static ATTRIBUTE_ID
*getAttributeId(XML_Parser parser
, const ENCODING
*enc
,
497 const char *start
, const char *end
);
498 static int setElementTypePrefix(XML_Parser parser
, ELEMENT_TYPE
*);
499 static enum XML_Error
storeEntityValue(XML_Parser parser
, const ENCODING
*enc
,
500 const char *start
, const char *end
,
501 enum XML_Account account
);
502 static int reportProcessingInstruction(XML_Parser parser
, const ENCODING
*enc
,
503 const char *start
, const char *end
);
504 static int reportComment(XML_Parser parser
, const ENCODING
*enc
,
505 const char *start
, const char *end
);
506 static void reportDefault(XML_Parser parser
, const ENCODING
*enc
,
507 const char *start
, const char *end
);
509 static const XML_Char
*getContext(XML_Parser parser
);
510 static XML_Bool
setContext(XML_Parser parser
, const XML_Char
*context
);
512 static void FASTCALL
normalizePublicId(XML_Char
*s
);
514 static DTD
*dtdCreate(const XML_Memory_Handling_Suite
*ms
);
515 /* do not call if m_parentParser != NULL */
516 static void dtdReset(DTD
*p
, const XML_Memory_Handling_Suite
*ms
);
517 static void dtdDestroy(DTD
*p
, XML_Bool isDocEntity
,
518 const XML_Memory_Handling_Suite
*ms
);
519 static int dtdCopy(XML_Parser oldParser
, DTD
*newDtd
, const DTD
*oldDtd
,
520 const XML_Memory_Handling_Suite
*ms
);
521 static int copyEntityTable(XML_Parser oldParser
, HASH_TABLE
*, STRING_POOL
*,
523 static NAMED
*lookup(XML_Parser parser
, HASH_TABLE
*table
, KEY name
,
525 static void FASTCALL
hashTableInit(HASH_TABLE
*,
526 const XML_Memory_Handling_Suite
*ms
);
527 static void FASTCALL
hashTableClear(HASH_TABLE
*);
528 static void FASTCALL
hashTableDestroy(HASH_TABLE
*);
529 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER
*, const HASH_TABLE
*);
530 static NAMED
*FASTCALL
hashTableIterNext(HASH_TABLE_ITER
*);
532 static void FASTCALL
poolInit(STRING_POOL
*,
533 const XML_Memory_Handling_Suite
*ms
);
534 static void FASTCALL
poolClear(STRING_POOL
*);
535 static void FASTCALL
poolDestroy(STRING_POOL
*);
536 static XML_Char
*poolAppend(STRING_POOL
*pool
, const ENCODING
*enc
,
537 const char *ptr
, const char *end
);
538 static XML_Char
*poolStoreString(STRING_POOL
*pool
, const ENCODING
*enc
,
539 const char *ptr
, const char *end
);
540 static XML_Bool FASTCALL
poolGrow(STRING_POOL
*pool
);
541 static const XML_Char
*FASTCALL
poolCopyString(STRING_POOL
*pool
,
543 static const XML_Char
*poolCopyStringN(STRING_POOL
*pool
, const XML_Char
*s
,
545 static const XML_Char
*FASTCALL
poolAppendString(STRING_POOL
*pool
,
548 static int FASTCALL
nextScaffoldPart(XML_Parser parser
);
549 static XML_Content
*build_model(XML_Parser parser
);
550 static ELEMENT_TYPE
*getElementType(XML_Parser parser
, const ENCODING
*enc
,
551 const char *ptr
, const char *end
);
553 static XML_Char
*copyString(const XML_Char
*s
,
554 const XML_Memory_Handling_Suite
*memsuite
);
556 static unsigned long generate_hash_secret_salt(XML_Parser parser
);
557 static XML_Bool
startParsing(XML_Parser parser
);
559 static XML_Parser
parserCreate(const XML_Char
*encodingName
,
560 const XML_Memory_Handling_Suite
*memsuite
,
561 const XML_Char
*nameSep
, DTD
*dtd
);
563 static void parserInit(XML_Parser parser
, const XML_Char
*encodingName
);
566 static float accountingGetCurrentAmplification(XML_Parser rootParser
);
567 static void accountingReportStats(XML_Parser originParser
, const char *epilog
);
568 static void accountingOnAbort(XML_Parser originParser
);
569 static void accountingReportDiff(XML_Parser rootParser
,
570 unsigned int levelsAwayFromRootParser
,
571 const char *before
, const char *after
,
572 ptrdiff_t bytesMore
, int source_line
,
573 enum XML_Account account
);
574 static XML_Bool
accountingDiffTolerated(XML_Parser originParser
, int tok
,
575 const char *before
, const char *after
,
577 enum XML_Account account
);
579 static void entityTrackingReportStats(XML_Parser parser
, ENTITY
*entity
,
580 const char *action
, int sourceLine
);
581 static void entityTrackingOnOpen(XML_Parser parser
, ENTITY
*entity
,
583 static void entityTrackingOnClose(XML_Parser parser
, ENTITY
*entity
,
586 static XML_Parser
getRootParserOf(XML_Parser parser
,
587 unsigned int *outLevelDiff
);
590 static unsigned long getDebugLevel(const char *variableName
,
591 unsigned long defaultDebugLevel
);
593 #define poolStart(pool) ((pool)->start)
594 #define poolEnd(pool) ((pool)->ptr)
595 #define poolLength(pool) ((pool)->ptr - (pool)->start)
596 #define poolChop(pool) ((void)--(pool->ptr))
597 #define poolLastChar(pool) (((pool)->ptr)[-1])
598 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
599 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
600 #define poolAppendChar(pool, c) \
601 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
603 : ((*((pool)->ptr)++ = c), 1))
605 struct XML_ParserStruct
{
606 /* The first member must be m_userData so that the XML_GetUserData
611 const XML_Memory_Handling_Suite m_mem
;
612 /* first character to be parsed */
613 const char *m_bufferPtr
;
614 /* past last character to be parsed */
616 /* allocated end of m_buffer */
617 const char *m_bufferLim
;
618 XML_Index m_parseEndByteIndex
;
619 const char *m_parseEndPtr
;
621 XML_Char
*m_dataBufEnd
;
622 XML_StartElementHandler m_startElementHandler
;
623 XML_EndElementHandler m_endElementHandler
;
624 XML_CharacterDataHandler m_characterDataHandler
;
625 XML_ProcessingInstructionHandler m_processingInstructionHandler
;
626 XML_CommentHandler m_commentHandler
;
627 XML_StartCdataSectionHandler m_startCdataSectionHandler
;
628 XML_EndCdataSectionHandler m_endCdataSectionHandler
;
629 XML_DefaultHandler m_defaultHandler
;
630 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler
;
631 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler
;
632 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler
;
633 XML_NotationDeclHandler m_notationDeclHandler
;
634 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler
;
635 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler
;
636 XML_NotStandaloneHandler m_notStandaloneHandler
;
637 XML_ExternalEntityRefHandler m_externalEntityRefHandler
;
638 XML_Parser m_externalEntityRefHandlerArg
;
639 XML_SkippedEntityHandler m_skippedEntityHandler
;
640 XML_UnknownEncodingHandler m_unknownEncodingHandler
;
641 XML_ElementDeclHandler m_elementDeclHandler
;
642 XML_AttlistDeclHandler m_attlistDeclHandler
;
643 XML_EntityDeclHandler m_entityDeclHandler
;
644 XML_XmlDeclHandler m_xmlDeclHandler
;
645 const ENCODING
*m_encoding
;
646 INIT_ENCODING m_initEncoding
;
647 const ENCODING
*m_internalEncoding
;
648 const XML_Char
*m_protocolEncodingName
;
650 XML_Bool m_ns_triplets
;
651 void *m_unknownEncodingMem
;
652 void *m_unknownEncodingData
;
653 void *m_unknownEncodingHandlerData
;
654 void(XMLCALL
*m_unknownEncodingRelease
)(void *);
655 PROLOG_STATE m_prologState
;
656 Processor
*m_processor
;
657 enum XML_Error m_errorCode
;
658 const char *m_eventPtr
;
659 const char *m_eventEndPtr
;
660 const char *m_positionPtr
;
661 OPEN_INTERNAL_ENTITY
*m_openInternalEntities
;
662 OPEN_INTERNAL_ENTITY
*m_freeInternalEntities
;
663 XML_Bool m_defaultExpandInternalEntities
;
665 ENTITY
*m_declEntity
;
666 const XML_Char
*m_doctypeName
;
667 const XML_Char
*m_doctypeSysid
;
668 const XML_Char
*m_doctypePubid
;
669 const XML_Char
*m_declAttributeType
;
670 const XML_Char
*m_declNotationName
;
671 const XML_Char
*m_declNotationPublicId
;
672 ELEMENT_TYPE
*m_declElementType
;
673 ATTRIBUTE_ID
*m_declAttributeId
;
674 XML_Bool m_declAttributeIsCdata
;
675 XML_Bool m_declAttributeIsId
;
677 const XML_Char
*m_curBase
;
680 BINDING
*m_inheritedBindings
;
681 BINDING
*m_freeBindingList
;
683 int m_nSpecifiedAtts
;
687 unsigned long m_nsAttsVersion
;
688 unsigned char m_nsAttsPower
;
690 XML_AttrInfo
*m_attInfo
;
693 STRING_POOL m_tempPool
;
694 STRING_POOL m_temp2Pool
;
695 char *m_groupConnector
;
696 unsigned int m_groupSize
;
697 XML_Char m_namespaceSeparator
;
698 XML_Parser m_parentParser
;
699 XML_ParsingStatus m_parsingStatus
;
701 XML_Bool m_isParamEntity
;
702 XML_Bool m_useForeignDTD
;
703 enum XML_ParamEntityParsing m_paramEntityParsing
;
705 unsigned long m_hash_secret_salt
;
707 ACCOUNTING m_accounting
;
708 ENTITY_STATS m_entity_stats
;
712 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
713 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
714 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
717 XML_ParserCreate(const XML_Char
*encodingName
) {
718 return XML_ParserCreate_MM(encodingName
, NULL
, NULL
);
722 XML_ParserCreateNS(const XML_Char
*encodingName
, XML_Char nsSep
) {
723 XML_Char tmp
[2] = {nsSep
, 0};
724 return XML_ParserCreate_MM(encodingName
, NULL
, tmp
);
727 // "xml=http://www.w3.org/XML/1998/namespace"
728 static const XML_Char implicitContext
[]
729 = {ASCII_x
, ASCII_m
, ASCII_l
, ASCII_EQUALS
, ASCII_h
,
730 ASCII_t
, ASCII_t
, ASCII_p
, ASCII_COLON
, ASCII_SLASH
,
731 ASCII_SLASH
, ASCII_w
, ASCII_w
, ASCII_w
, ASCII_PERIOD
,
732 ASCII_w
, ASCII_3
, ASCII_PERIOD
, ASCII_o
, ASCII_r
,
733 ASCII_g
, ASCII_SLASH
, ASCII_X
, ASCII_M
, ASCII_L
,
734 ASCII_SLASH
, ASCII_1
, ASCII_9
, ASCII_9
, ASCII_8
,
735 ASCII_SLASH
, ASCII_n
, ASCII_a
, ASCII_m
, ASCII_e
,
736 ASCII_s
, ASCII_p
, ASCII_a
, ASCII_c
, ASCII_e
,
739 /* To avoid warnings about unused functions: */
740 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
742 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
744 /* Obtain entropy on Linux 3.17+ */
746 writeRandomBytes_getrandom_nonblock(void *target
, size_t count
) {
747 int success
= 0; /* full count bytes written? */
748 size_t bytesWrittenTotal
= 0;
749 const unsigned int getrandomFlags
= GRND_NONBLOCK
;
752 void *const currentTarget
= (void *)((char *)target
+ bytesWrittenTotal
);
753 const size_t bytesToWrite
= count
- bytesWrittenTotal
;
755 const int bytesWrittenMore
=
756 # if defined(HAVE_GETRANDOM)
757 getrandom(currentTarget
, bytesToWrite
, getrandomFlags
);
759 syscall(SYS_getrandom
, currentTarget
, bytesToWrite
, getrandomFlags
);
762 if (bytesWrittenMore
> 0) {
763 bytesWrittenTotal
+= bytesWrittenMore
;
764 if (bytesWrittenTotal
>= count
)
767 } while (! success
&& (errno
== EINTR
));
772 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
774 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
776 /* Extract entropy from /dev/urandom */
778 writeRandomBytes_dev_urandom(void *target
, size_t count
) {
779 int success
= 0; /* full count bytes written? */
780 size_t bytesWrittenTotal
= 0;
782 const int fd
= open("/dev/urandom", O_RDONLY
);
788 void *const currentTarget
= (void *)((char *)target
+ bytesWrittenTotal
);
789 const size_t bytesToWrite
= count
- bytesWrittenTotal
;
791 const ssize_t bytesWrittenMore
= read(fd
, currentTarget
, bytesToWrite
);
793 if (bytesWrittenMore
> 0) {
794 bytesWrittenTotal
+= bytesWrittenMore
;
795 if (bytesWrittenTotal
>= count
)
798 } while (! success
&& (errno
== EINTR
));
804 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
806 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
808 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
811 writeRandomBytes_arc4random(void *target
, size_t count
) {
812 size_t bytesWrittenTotal
= 0;
814 while (bytesWrittenTotal
< count
) {
815 const uint32_t random32
= arc4random();
818 for (; (i
< sizeof(random32
)) && (bytesWrittenTotal
< count
);
819 i
++, bytesWrittenTotal
++) {
820 const uint8_t random8
= (uint8_t)(random32
>> (i
* 8));
821 ((uint8_t *)target
)[bytesWrittenTotal
] = random8
;
826 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
830 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
831 as it didn't declare it in its header prior to version 5.3.0 of its
832 runtime package (mingwrt, containing stdlib.h). The upstream fix
833 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
834 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
835 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
836 __declspec(dllimport
) int rand_s(unsigned int *);
839 /* Obtain entropy on Windows using the rand_s() function which
840 * generates cryptographically secure random numbers. Internally it
841 * uses RtlGenRandom API which is present in Windows XP and later.
844 writeRandomBytes_rand_s(void *target
, size_t count
) {
845 size_t bytesWrittenTotal
= 0;
847 while (bytesWrittenTotal
< count
) {
848 unsigned int random32
= 0;
851 if (rand_s(&random32
))
852 return 0; /* failure */
854 for (; (i
< sizeof(random32
)) && (bytesWrittenTotal
< count
);
855 i
++, bytesWrittenTotal
++) {
856 const uint8_t random8
= (uint8_t)(random32
>> (i
* 8));
857 ((uint8_t *)target
)[bytesWrittenTotal
] = random8
;
860 return 1; /* success */
865 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
868 gather_time_entropy(void) {
871 GetSystemTimeAsFileTime(&ft
); /* never fails */
872 return ft
.dwHighDateTime
^ ft
.dwLowDateTime
;
875 int gettimeofday_res
;
877 gettimeofday_res
= gettimeofday(&tv
, NULL
);
880 (void)gettimeofday_res
;
882 assert(gettimeofday_res
== 0);
883 # endif /* defined(NDEBUG) */
885 /* Microseconds time is <20 bits entropy */
890 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
893 ENTROPY_DEBUG(const char *label
, unsigned long entropy
) {
894 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
895 fprintf(stderr
, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label
,
896 (int)sizeof(entropy
) * 2, entropy
, (unsigned long)sizeof(entropy
));
902 generate_hash_secret_salt(XML_Parser parser
) {
903 unsigned long entropy
;
906 /* "Failproof" high quality providers: */
907 #if defined(HAVE_ARC4RANDOM_BUF)
908 arc4random_buf(&entropy
, sizeof(entropy
));
909 return ENTROPY_DEBUG("arc4random_buf", entropy
);
910 #elif defined(HAVE_ARC4RANDOM)
911 writeRandomBytes_arc4random((void *)&entropy
, sizeof(entropy
));
912 return ENTROPY_DEBUG("arc4random", entropy
);
914 /* Try high quality providers first .. */
916 if (writeRandomBytes_rand_s((void *)&entropy
, sizeof(entropy
))) {
917 return ENTROPY_DEBUG("rand_s", entropy
);
919 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
920 if (writeRandomBytes_getrandom_nonblock((void *)&entropy
, sizeof(entropy
))) {
921 return ENTROPY_DEBUG("getrandom", entropy
);
924 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
925 if (writeRandomBytes_dev_urandom((void *)&entropy
, sizeof(entropy
))) {
926 return ENTROPY_DEBUG("/dev/urandom", entropy
);
928 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
929 /* .. and self-made low quality for backup: */
931 /* Process ID is 0 bits entropy if attacker has local access */
932 entropy
= gather_time_entropy() ^ getpid();
934 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
935 if (sizeof(unsigned long) == 4) {
936 return ENTROPY_DEBUG("fallback(4)", entropy
* 2147483647);
938 return ENTROPY_DEBUG("fallback(8)",
939 entropy
* (unsigned long)2305843009213693951ULL);
945 get_hash_secret_salt(XML_Parser parser
) {
946 if (parser
->m_parentParser
!= NULL
)
947 return get_hash_secret_salt(parser
->m_parentParser
);
948 return parser
->m_hash_secret_salt
;
951 static XML_Bool
/* only valid for root parser */
952 startParsing(XML_Parser parser
) {
953 /* hash functions must be initialized before setContext() is called */
954 if (parser
->m_hash_secret_salt
== 0)
955 parser
->m_hash_secret_salt
= generate_hash_secret_salt(parser
);
957 /* implicit context only set for root parser, since child
958 parsers (i.e. external entity parsers) will inherit it
960 return setContext(parser
, implicitContext
);
966 XML_ParserCreate_MM(const XML_Char
*encodingName
,
967 const XML_Memory_Handling_Suite
*memsuite
,
968 const XML_Char
*nameSep
) {
969 return parserCreate(encodingName
, memsuite
, nameSep
, NULL
);
973 parserCreate(const XML_Char
*encodingName
,
974 const XML_Memory_Handling_Suite
*memsuite
, const XML_Char
*nameSep
,
979 XML_Memory_Handling_Suite
*mtemp
;
980 parser
= memsuite
->malloc_fcn(sizeof(struct XML_ParserStruct
));
981 if (parser
!= NULL
) {
982 mtemp
= (XML_Memory_Handling_Suite
*)&(parser
->m_mem
);
983 mtemp
->malloc_fcn
= memsuite
->malloc_fcn
;
984 mtemp
->realloc_fcn
= memsuite
->realloc_fcn
;
985 mtemp
->free_fcn
= memsuite
->free_fcn
;
988 XML_Memory_Handling_Suite
*mtemp
;
989 parser
= (XML_Parser
)malloc(sizeof(struct XML_ParserStruct
));
990 if (parser
!= NULL
) {
991 mtemp
= (XML_Memory_Handling_Suite
*)&(parser
->m_mem
);
992 mtemp
->malloc_fcn
= malloc
;
993 mtemp
->realloc_fcn
= realloc
;
994 mtemp
->free_fcn
= free
;
1001 parser
->m_buffer
= NULL
;
1002 parser
->m_bufferLim
= NULL
;
1004 parser
->m_attsSize
= INIT_ATTS_SIZE
;
1006 = (ATTRIBUTE
*)MALLOC(parser
, parser
->m_attsSize
* sizeof(ATTRIBUTE
));
1007 if (parser
->m_atts
== NULL
) {
1008 FREE(parser
, parser
);
1011 #ifdef XML_ATTR_INFO
1012 parser
->m_attInfo
= (XML_AttrInfo
*)MALLOC(
1013 parser
, parser
->m_attsSize
* sizeof(XML_AttrInfo
));
1014 if (parser
->m_attInfo
== NULL
) {
1015 FREE(parser
, parser
->m_atts
);
1016 FREE(parser
, parser
);
1021 = (XML_Char
*)MALLOC(parser
, INIT_DATA_BUF_SIZE
* sizeof(XML_Char
));
1022 if (parser
->m_dataBuf
== NULL
) {
1023 FREE(parser
, parser
->m_atts
);
1024 #ifdef XML_ATTR_INFO
1025 FREE(parser
, parser
->m_attInfo
);
1027 FREE(parser
, parser
);
1030 parser
->m_dataBufEnd
= parser
->m_dataBuf
+ INIT_DATA_BUF_SIZE
;
1033 parser
->m_dtd
= dtd
;
1035 parser
->m_dtd
= dtdCreate(&parser
->m_mem
);
1036 if (parser
->m_dtd
== NULL
) {
1037 FREE(parser
, parser
->m_dataBuf
);
1038 FREE(parser
, parser
->m_atts
);
1039 #ifdef XML_ATTR_INFO
1040 FREE(parser
, parser
->m_attInfo
);
1042 FREE(parser
, parser
);
1047 parser
->m_freeBindingList
= NULL
;
1048 parser
->m_freeTagList
= NULL
;
1049 parser
->m_freeInternalEntities
= NULL
;
1051 parser
->m_groupSize
= 0;
1052 parser
->m_groupConnector
= NULL
;
1054 parser
->m_unknownEncodingHandler
= NULL
;
1055 parser
->m_unknownEncodingHandlerData
= NULL
;
1057 parser
->m_namespaceSeparator
= ASCII_EXCL
;
1058 parser
->m_ns
= XML_FALSE
;
1059 parser
->m_ns_triplets
= XML_FALSE
;
1061 parser
->m_nsAtts
= NULL
;
1062 parser
->m_nsAttsVersion
= 0;
1063 parser
->m_nsAttsPower
= 0;
1065 parser
->m_protocolEncodingName
= NULL
;
1067 poolInit(&parser
->m_tempPool
, &(parser
->m_mem
));
1068 poolInit(&parser
->m_temp2Pool
, &(parser
->m_mem
));
1069 parserInit(parser
, encodingName
);
1071 if (encodingName
&& ! parser
->m_protocolEncodingName
) {
1073 // We need to stop the upcoming call to XML_ParserFree from happily
1074 // destroying parser->m_dtd because the DTD is shared with the parent
1075 // parser and the only guard that keeps XML_ParserFree from destroying
1076 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1077 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1078 parser
->m_dtd
= NULL
;
1080 XML_ParserFree(parser
);
1085 parser
->m_ns
= XML_TRUE
;
1086 parser
->m_internalEncoding
= XmlGetInternalEncodingNS();
1087 parser
->m_namespaceSeparator
= *nameSep
;
1089 parser
->m_internalEncoding
= XmlGetInternalEncoding();
1096 parserInit(XML_Parser parser
, const XML_Char
*encodingName
) {
1097 parser
->m_processor
= prologInitProcessor
;
1098 XmlPrologStateInit(&parser
->m_prologState
);
1099 if (encodingName
!= NULL
) {
1100 parser
->m_protocolEncodingName
= copyString(encodingName
, &(parser
->m_mem
));
1102 parser
->m_curBase
= NULL
;
1103 XmlInitEncoding(&parser
->m_initEncoding
, &parser
->m_encoding
, 0);
1104 parser
->m_userData
= NULL
;
1105 parser
->m_handlerArg
= NULL
;
1106 parser
->m_startElementHandler
= NULL
;
1107 parser
->m_endElementHandler
= NULL
;
1108 parser
->m_characterDataHandler
= NULL
;
1109 parser
->m_processingInstructionHandler
= NULL
;
1110 parser
->m_commentHandler
= NULL
;
1111 parser
->m_startCdataSectionHandler
= NULL
;
1112 parser
->m_endCdataSectionHandler
= NULL
;
1113 parser
->m_defaultHandler
= NULL
;
1114 parser
->m_startDoctypeDeclHandler
= NULL
;
1115 parser
->m_endDoctypeDeclHandler
= NULL
;
1116 parser
->m_unparsedEntityDeclHandler
= NULL
;
1117 parser
->m_notationDeclHandler
= NULL
;
1118 parser
->m_startNamespaceDeclHandler
= NULL
;
1119 parser
->m_endNamespaceDeclHandler
= NULL
;
1120 parser
->m_notStandaloneHandler
= NULL
;
1121 parser
->m_externalEntityRefHandler
= NULL
;
1122 parser
->m_externalEntityRefHandlerArg
= parser
;
1123 parser
->m_skippedEntityHandler
= NULL
;
1124 parser
->m_elementDeclHandler
= NULL
;
1125 parser
->m_attlistDeclHandler
= NULL
;
1126 parser
->m_entityDeclHandler
= NULL
;
1127 parser
->m_xmlDeclHandler
= NULL
;
1128 parser
->m_bufferPtr
= parser
->m_buffer
;
1129 parser
->m_bufferEnd
= parser
->m_buffer
;
1130 parser
->m_parseEndByteIndex
= 0;
1131 parser
->m_parseEndPtr
= NULL
;
1132 parser
->m_declElementType
= NULL
;
1133 parser
->m_declAttributeId
= NULL
;
1134 parser
->m_declEntity
= NULL
;
1135 parser
->m_doctypeName
= NULL
;
1136 parser
->m_doctypeSysid
= NULL
;
1137 parser
->m_doctypePubid
= NULL
;
1138 parser
->m_declAttributeType
= NULL
;
1139 parser
->m_declNotationName
= NULL
;
1140 parser
->m_declNotationPublicId
= NULL
;
1141 parser
->m_declAttributeIsCdata
= XML_FALSE
;
1142 parser
->m_declAttributeIsId
= XML_FALSE
;
1143 memset(&parser
->m_position
, 0, sizeof(POSITION
));
1144 parser
->m_errorCode
= XML_ERROR_NONE
;
1145 parser
->m_eventPtr
= NULL
;
1146 parser
->m_eventEndPtr
= NULL
;
1147 parser
->m_positionPtr
= NULL
;
1148 parser
->m_openInternalEntities
= NULL
;
1149 parser
->m_defaultExpandInternalEntities
= XML_TRUE
;
1150 parser
->m_tagLevel
= 0;
1151 parser
->m_tagStack
= NULL
;
1152 parser
->m_inheritedBindings
= NULL
;
1153 parser
->m_nSpecifiedAtts
= 0;
1154 parser
->m_unknownEncodingMem
= NULL
;
1155 parser
->m_unknownEncodingRelease
= NULL
;
1156 parser
->m_unknownEncodingData
= NULL
;
1157 parser
->m_parentParser
= NULL
;
1158 parser
->m_parsingStatus
.parsing
= XML_INITIALIZED
;
1160 parser
->m_isParamEntity
= XML_FALSE
;
1161 parser
->m_useForeignDTD
= XML_FALSE
;
1162 parser
->m_paramEntityParsing
= XML_PARAM_ENTITY_PARSING_NEVER
;
1164 parser
->m_hash_secret_salt
= 0;
1167 memset(&parser
->m_accounting
, 0, sizeof(ACCOUNTING
));
1168 parser
->m_accounting
.debugLevel
= getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1169 parser
->m_accounting
.maximumAmplificationFactor
1170 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT
;
1171 parser
->m_accounting
.activationThresholdBytes
1172 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
;
1174 memset(&parser
->m_entity_stats
, 0, sizeof(ENTITY_STATS
));
1175 parser
->m_entity_stats
.debugLevel
= getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1179 /* moves list of bindings to m_freeBindingList */
1180 static void FASTCALL
1181 moveToFreeBindingList(XML_Parser parser
, BINDING
*bindings
) {
1183 BINDING
*b
= bindings
;
1184 bindings
= bindings
->nextTagBinding
;
1185 b
->nextTagBinding
= parser
->m_freeBindingList
;
1186 parser
->m_freeBindingList
= b
;
1191 XML_ParserReset(XML_Parser parser
, const XML_Char
*encodingName
) {
1193 OPEN_INTERNAL_ENTITY
*openEntityList
;
1198 if (parser
->m_parentParser
)
1200 /* move m_tagStack to m_freeTagList */
1201 tStk
= parser
->m_tagStack
;
1204 tStk
= tStk
->parent
;
1205 tag
->parent
= parser
->m_freeTagList
;
1206 moveToFreeBindingList(parser
, tag
->bindings
);
1207 tag
->bindings
= NULL
;
1208 parser
->m_freeTagList
= tag
;
1210 /* move m_openInternalEntities to m_freeInternalEntities */
1211 openEntityList
= parser
->m_openInternalEntities
;
1212 while (openEntityList
) {
1213 OPEN_INTERNAL_ENTITY
*openEntity
= openEntityList
;
1214 openEntityList
= openEntity
->next
;
1215 openEntity
->next
= parser
->m_freeInternalEntities
;
1216 parser
->m_freeInternalEntities
= openEntity
;
1218 moveToFreeBindingList(parser
, parser
->m_inheritedBindings
);
1219 FREE(parser
, parser
->m_unknownEncodingMem
);
1220 if (parser
->m_unknownEncodingRelease
)
1221 parser
->m_unknownEncodingRelease(parser
->m_unknownEncodingData
);
1222 poolClear(&parser
->m_tempPool
);
1223 poolClear(&parser
->m_temp2Pool
);
1224 FREE(parser
, (void *)parser
->m_protocolEncodingName
);
1225 parser
->m_protocolEncodingName
= NULL
;
1226 parserInit(parser
, encodingName
);
1227 dtdReset(parser
->m_dtd
, &parser
->m_mem
);
1231 enum XML_Status XMLCALL
1232 XML_SetEncoding(XML_Parser parser
, const XML_Char
*encodingName
) {
1234 return XML_STATUS_ERROR
;
1235 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1236 XXX There's no way for the caller to determine which of the
1237 XXX possible error cases caused the XML_STATUS_ERROR return.
1239 if (parser
->m_parsingStatus
.parsing
== XML_PARSING
1240 || parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
1241 return XML_STATUS_ERROR
;
1243 /* Get rid of any previous encoding name */
1244 FREE(parser
, (void *)parser
->m_protocolEncodingName
);
1246 if (encodingName
== NULL
)
1247 /* No new encoding name */
1248 parser
->m_protocolEncodingName
= NULL
;
1250 /* Copy the new encoding name into allocated memory */
1251 parser
->m_protocolEncodingName
= copyString(encodingName
, &(parser
->m_mem
));
1252 if (! parser
->m_protocolEncodingName
)
1253 return XML_STATUS_ERROR
;
1255 return XML_STATUS_OK
;
1259 XML_ExternalEntityParserCreate(XML_Parser oldParser
, const XML_Char
*context
,
1260 const XML_Char
*encodingName
) {
1261 XML_Parser parser
= oldParser
;
1264 XML_StartElementHandler oldStartElementHandler
;
1265 XML_EndElementHandler oldEndElementHandler
;
1266 XML_CharacterDataHandler oldCharacterDataHandler
;
1267 XML_ProcessingInstructionHandler oldProcessingInstructionHandler
;
1268 XML_CommentHandler oldCommentHandler
;
1269 XML_StartCdataSectionHandler oldStartCdataSectionHandler
;
1270 XML_EndCdataSectionHandler oldEndCdataSectionHandler
;
1271 XML_DefaultHandler oldDefaultHandler
;
1272 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler
;
1273 XML_NotationDeclHandler oldNotationDeclHandler
;
1274 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler
;
1275 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler
;
1276 XML_NotStandaloneHandler oldNotStandaloneHandler
;
1277 XML_ExternalEntityRefHandler oldExternalEntityRefHandler
;
1278 XML_SkippedEntityHandler oldSkippedEntityHandler
;
1279 XML_UnknownEncodingHandler oldUnknownEncodingHandler
;
1280 XML_ElementDeclHandler oldElementDeclHandler
;
1281 XML_AttlistDeclHandler oldAttlistDeclHandler
;
1282 XML_EntityDeclHandler oldEntityDeclHandler
;
1283 XML_XmlDeclHandler oldXmlDeclHandler
;
1284 ELEMENT_TYPE
*oldDeclElementType
;
1287 void *oldHandlerArg
;
1288 XML_Bool oldDefaultExpandInternalEntities
;
1289 XML_Parser oldExternalEntityRefHandlerArg
;
1291 enum XML_ParamEntityParsing oldParamEntityParsing
;
1292 int oldInEntityValue
;
1294 XML_Bool oldns_triplets
;
1295 /* Note that the new parser shares the same hash secret as the old
1296 parser, so that dtdCopy and copyEntityTable can lookup values
1297 from hash tables associated with either parser without us having
1298 to worry which hash secrets each table has.
1300 unsigned long oldhash_secret_salt
;
1302 /* Validate the oldParser parameter before we pull everything out of it */
1303 if (oldParser
== NULL
)
1306 /* Stash the original parser contents on the stack */
1307 oldDtd
= parser
->m_dtd
;
1308 oldStartElementHandler
= parser
->m_startElementHandler
;
1309 oldEndElementHandler
= parser
->m_endElementHandler
;
1310 oldCharacterDataHandler
= parser
->m_characterDataHandler
;
1311 oldProcessingInstructionHandler
= parser
->m_processingInstructionHandler
;
1312 oldCommentHandler
= parser
->m_commentHandler
;
1313 oldStartCdataSectionHandler
= parser
->m_startCdataSectionHandler
;
1314 oldEndCdataSectionHandler
= parser
->m_endCdataSectionHandler
;
1315 oldDefaultHandler
= parser
->m_defaultHandler
;
1316 oldUnparsedEntityDeclHandler
= parser
->m_unparsedEntityDeclHandler
;
1317 oldNotationDeclHandler
= parser
->m_notationDeclHandler
;
1318 oldStartNamespaceDeclHandler
= parser
->m_startNamespaceDeclHandler
;
1319 oldEndNamespaceDeclHandler
= parser
->m_endNamespaceDeclHandler
;
1320 oldNotStandaloneHandler
= parser
->m_notStandaloneHandler
;
1321 oldExternalEntityRefHandler
= parser
->m_externalEntityRefHandler
;
1322 oldSkippedEntityHandler
= parser
->m_skippedEntityHandler
;
1323 oldUnknownEncodingHandler
= parser
->m_unknownEncodingHandler
;
1324 oldElementDeclHandler
= parser
->m_elementDeclHandler
;
1325 oldAttlistDeclHandler
= parser
->m_attlistDeclHandler
;
1326 oldEntityDeclHandler
= parser
->m_entityDeclHandler
;
1327 oldXmlDeclHandler
= parser
->m_xmlDeclHandler
;
1328 oldDeclElementType
= parser
->m_declElementType
;
1330 oldUserData
= parser
->m_userData
;
1331 oldHandlerArg
= parser
->m_handlerArg
;
1332 oldDefaultExpandInternalEntities
= parser
->m_defaultExpandInternalEntities
;
1333 oldExternalEntityRefHandlerArg
= parser
->m_externalEntityRefHandlerArg
;
1335 oldParamEntityParsing
= parser
->m_paramEntityParsing
;
1336 oldInEntityValue
= parser
->m_prologState
.inEntityValue
;
1338 oldns_triplets
= parser
->m_ns_triplets
;
1339 /* Note that the new parser shares the same hash secret as the old
1340 parser, so that dtdCopy and copyEntityTable can lookup values
1341 from hash tables associated with either parser without us having
1342 to worry which hash secrets each table has.
1344 oldhash_secret_salt
= parser
->m_hash_secret_salt
;
1349 #endif /* XML_DTD */
1351 /* Note that the magical uses of the pre-processor to make field
1352 access look more like C++ require that `parser' be overwritten
1353 here. This makes this function more painful to follow than it
1357 XML_Char tmp
[2] = {parser
->m_namespaceSeparator
, 0};
1358 parser
= parserCreate(encodingName
, &parser
->m_mem
, tmp
, newDtd
);
1360 parser
= parserCreate(encodingName
, &parser
->m_mem
, NULL
, newDtd
);
1366 parser
->m_startElementHandler
= oldStartElementHandler
;
1367 parser
->m_endElementHandler
= oldEndElementHandler
;
1368 parser
->m_characterDataHandler
= oldCharacterDataHandler
;
1369 parser
->m_processingInstructionHandler
= oldProcessingInstructionHandler
;
1370 parser
->m_commentHandler
= oldCommentHandler
;
1371 parser
->m_startCdataSectionHandler
= oldStartCdataSectionHandler
;
1372 parser
->m_endCdataSectionHandler
= oldEndCdataSectionHandler
;
1373 parser
->m_defaultHandler
= oldDefaultHandler
;
1374 parser
->m_unparsedEntityDeclHandler
= oldUnparsedEntityDeclHandler
;
1375 parser
->m_notationDeclHandler
= oldNotationDeclHandler
;
1376 parser
->m_startNamespaceDeclHandler
= oldStartNamespaceDeclHandler
;
1377 parser
->m_endNamespaceDeclHandler
= oldEndNamespaceDeclHandler
;
1378 parser
->m_notStandaloneHandler
= oldNotStandaloneHandler
;
1379 parser
->m_externalEntityRefHandler
= oldExternalEntityRefHandler
;
1380 parser
->m_skippedEntityHandler
= oldSkippedEntityHandler
;
1381 parser
->m_unknownEncodingHandler
= oldUnknownEncodingHandler
;
1382 parser
->m_elementDeclHandler
= oldElementDeclHandler
;
1383 parser
->m_attlistDeclHandler
= oldAttlistDeclHandler
;
1384 parser
->m_entityDeclHandler
= oldEntityDeclHandler
;
1385 parser
->m_xmlDeclHandler
= oldXmlDeclHandler
;
1386 parser
->m_declElementType
= oldDeclElementType
;
1387 parser
->m_userData
= oldUserData
;
1388 if (oldUserData
== oldHandlerArg
)
1389 parser
->m_handlerArg
= parser
->m_userData
;
1391 parser
->m_handlerArg
= parser
;
1392 if (oldExternalEntityRefHandlerArg
!= oldParser
)
1393 parser
->m_externalEntityRefHandlerArg
= oldExternalEntityRefHandlerArg
;
1394 parser
->m_defaultExpandInternalEntities
= oldDefaultExpandInternalEntities
;
1395 parser
->m_ns_triplets
= oldns_triplets
;
1396 parser
->m_hash_secret_salt
= oldhash_secret_salt
;
1397 parser
->m_parentParser
= oldParser
;
1399 parser
->m_paramEntityParsing
= oldParamEntityParsing
;
1400 parser
->m_prologState
.inEntityValue
= oldInEntityValue
;
1402 #endif /* XML_DTD */
1403 if (! dtdCopy(oldParser
, parser
->m_dtd
, oldDtd
, &parser
->m_mem
)
1404 || ! setContext(parser
, context
)) {
1405 XML_ParserFree(parser
);
1408 parser
->m_processor
= externalEntityInitProcessor
;
1411 /* The DTD instance referenced by parser->m_dtd is shared between the
1412 document's root parser and external PE parsers, therefore one does not
1413 need to call setContext. In addition, one also *must* not call
1414 setContext, because this would overwrite existing prefix->binding
1415 pointers in parser->m_dtd with ones that get destroyed with the external
1416 PE parser. This would leave those prefixes with dangling pointers.
1418 parser
->m_isParamEntity
= XML_TRUE
;
1419 XmlPrologStateInitExternalEntity(&parser
->m_prologState
);
1420 parser
->m_processor
= externalParEntInitProcessor
;
1422 #endif /* XML_DTD */
1426 static void FASTCALL
1427 destroyBindings(BINDING
*bindings
, XML_Parser parser
) {
1429 BINDING
*b
= bindings
;
1432 bindings
= b
->nextTagBinding
;
1433 FREE(parser
, b
->uri
);
1439 XML_ParserFree(XML_Parser parser
) {
1441 OPEN_INTERNAL_ENTITY
*entityList
;
1444 /* free m_tagStack and m_freeTagList */
1445 tagList
= parser
->m_tagStack
;
1448 if (tagList
== NULL
) {
1449 if (parser
->m_freeTagList
== NULL
)
1451 tagList
= parser
->m_freeTagList
;
1452 parser
->m_freeTagList
= NULL
;
1455 tagList
= tagList
->parent
;
1456 FREE(parser
, p
->buf
);
1457 destroyBindings(p
->bindings
, parser
);
1460 /* free m_openInternalEntities and m_freeInternalEntities */
1461 entityList
= parser
->m_openInternalEntities
;
1463 OPEN_INTERNAL_ENTITY
*openEntity
;
1464 if (entityList
== NULL
) {
1465 if (parser
->m_freeInternalEntities
== NULL
)
1467 entityList
= parser
->m_freeInternalEntities
;
1468 parser
->m_freeInternalEntities
= NULL
;
1470 openEntity
= entityList
;
1471 entityList
= entityList
->next
;
1472 FREE(parser
, openEntity
);
1475 destroyBindings(parser
->m_freeBindingList
, parser
);
1476 destroyBindings(parser
->m_inheritedBindings
, parser
);
1477 poolDestroy(&parser
->m_tempPool
);
1478 poolDestroy(&parser
->m_temp2Pool
);
1479 FREE(parser
, (void *)parser
->m_protocolEncodingName
);
1481 /* external parameter entity parsers share the DTD structure
1482 parser->m_dtd with the root parser, so we must not destroy it
1484 if (! parser
->m_isParamEntity
&& parser
->m_dtd
)
1487 #endif /* XML_DTD */
1488 dtdDestroy(parser
->m_dtd
, (XML_Bool
)! parser
->m_parentParser
,
1490 FREE(parser
, (void *)parser
->m_atts
);
1491 #ifdef XML_ATTR_INFO
1492 FREE(parser
, (void *)parser
->m_attInfo
);
1494 FREE(parser
, parser
->m_groupConnector
);
1495 FREE(parser
, parser
->m_buffer
);
1496 FREE(parser
, parser
->m_dataBuf
);
1497 FREE(parser
, parser
->m_nsAtts
);
1498 FREE(parser
, parser
->m_unknownEncodingMem
);
1499 if (parser
->m_unknownEncodingRelease
)
1500 parser
->m_unknownEncodingRelease(parser
->m_unknownEncodingData
);
1501 FREE(parser
, parser
);
1505 XML_UseParserAsHandlerArg(XML_Parser parser
) {
1507 parser
->m_handlerArg
= parser
;
1510 enum XML_Error XMLCALL
1511 XML_UseForeignDTD(XML_Parser parser
, XML_Bool useDTD
) {
1513 return XML_ERROR_INVALID_ARGUMENT
;
1515 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1516 if (parser
->m_parsingStatus
.parsing
== XML_PARSING
1517 || parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
1518 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
;
1519 parser
->m_useForeignDTD
= useDTD
;
1520 return XML_ERROR_NONE
;
1523 return XML_ERROR_FEATURE_REQUIRES_XML_DTD
;
1528 XML_SetReturnNSTriplet(XML_Parser parser
, int do_nst
) {
1531 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1532 if (parser
->m_parsingStatus
.parsing
== XML_PARSING
1533 || parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
1535 parser
->m_ns_triplets
= do_nst
? XML_TRUE
: XML_FALSE
;
1539 XML_SetUserData(XML_Parser parser
, void *p
) {
1542 if (parser
->m_handlerArg
== parser
->m_userData
)
1543 parser
->m_handlerArg
= parser
->m_userData
= p
;
1545 parser
->m_userData
= p
;
1548 enum XML_Status XMLCALL
1549 XML_SetBase(XML_Parser parser
, const XML_Char
*p
) {
1551 return XML_STATUS_ERROR
;
1553 p
= poolCopyString(&parser
->m_dtd
->pool
, p
);
1555 return XML_STATUS_ERROR
;
1556 parser
->m_curBase
= p
;
1558 parser
->m_curBase
= NULL
;
1559 return XML_STATUS_OK
;
1562 const XML_Char
*XMLCALL
1563 XML_GetBase(XML_Parser parser
) {
1566 return parser
->m_curBase
;
1570 XML_GetSpecifiedAttributeCount(XML_Parser parser
) {
1573 return parser
->m_nSpecifiedAtts
;
1577 XML_GetIdAttributeIndex(XML_Parser parser
) {
1580 return parser
->m_idAttIndex
;
1583 #ifdef XML_ATTR_INFO
1584 const XML_AttrInfo
*XMLCALL
1585 XML_GetAttributeInfo(XML_Parser parser
) {
1588 return parser
->m_attInfo
;
1593 XML_SetElementHandler(XML_Parser parser
, XML_StartElementHandler start
,
1594 XML_EndElementHandler end
) {
1597 parser
->m_startElementHandler
= start
;
1598 parser
->m_endElementHandler
= end
;
1602 XML_SetStartElementHandler(XML_Parser parser
, XML_StartElementHandler start
) {
1604 parser
->m_startElementHandler
= start
;
1608 XML_SetEndElementHandler(XML_Parser parser
, XML_EndElementHandler end
) {
1610 parser
->m_endElementHandler
= end
;
1614 XML_SetCharacterDataHandler(XML_Parser parser
,
1615 XML_CharacterDataHandler handler
) {
1617 parser
->m_characterDataHandler
= handler
;
1621 XML_SetProcessingInstructionHandler(XML_Parser parser
,
1622 XML_ProcessingInstructionHandler handler
) {
1624 parser
->m_processingInstructionHandler
= handler
;
1628 XML_SetCommentHandler(XML_Parser parser
, XML_CommentHandler handler
) {
1630 parser
->m_commentHandler
= handler
;
1634 XML_SetCdataSectionHandler(XML_Parser parser
,
1635 XML_StartCdataSectionHandler start
,
1636 XML_EndCdataSectionHandler end
) {
1639 parser
->m_startCdataSectionHandler
= start
;
1640 parser
->m_endCdataSectionHandler
= end
;
1644 XML_SetStartCdataSectionHandler(XML_Parser parser
,
1645 XML_StartCdataSectionHandler start
) {
1647 parser
->m_startCdataSectionHandler
= start
;
1651 XML_SetEndCdataSectionHandler(XML_Parser parser
,
1652 XML_EndCdataSectionHandler end
) {
1654 parser
->m_endCdataSectionHandler
= end
;
1658 XML_SetDefaultHandler(XML_Parser parser
, XML_DefaultHandler handler
) {
1661 parser
->m_defaultHandler
= handler
;
1662 parser
->m_defaultExpandInternalEntities
= XML_FALSE
;
1666 XML_SetDefaultHandlerExpand(XML_Parser parser
, XML_DefaultHandler handler
) {
1669 parser
->m_defaultHandler
= handler
;
1670 parser
->m_defaultExpandInternalEntities
= XML_TRUE
;
1674 XML_SetDoctypeDeclHandler(XML_Parser parser
, XML_StartDoctypeDeclHandler start
,
1675 XML_EndDoctypeDeclHandler end
) {
1678 parser
->m_startDoctypeDeclHandler
= start
;
1679 parser
->m_endDoctypeDeclHandler
= end
;
1683 XML_SetStartDoctypeDeclHandler(XML_Parser parser
,
1684 XML_StartDoctypeDeclHandler start
) {
1686 parser
->m_startDoctypeDeclHandler
= start
;
1690 XML_SetEndDoctypeDeclHandler(XML_Parser parser
, XML_EndDoctypeDeclHandler end
) {
1692 parser
->m_endDoctypeDeclHandler
= end
;
1696 XML_SetUnparsedEntityDeclHandler(XML_Parser parser
,
1697 XML_UnparsedEntityDeclHandler handler
) {
1699 parser
->m_unparsedEntityDeclHandler
= handler
;
1703 XML_SetNotationDeclHandler(XML_Parser parser
, XML_NotationDeclHandler handler
) {
1705 parser
->m_notationDeclHandler
= handler
;
1709 XML_SetNamespaceDeclHandler(XML_Parser parser
,
1710 XML_StartNamespaceDeclHandler start
,
1711 XML_EndNamespaceDeclHandler end
) {
1714 parser
->m_startNamespaceDeclHandler
= start
;
1715 parser
->m_endNamespaceDeclHandler
= end
;
1719 XML_SetStartNamespaceDeclHandler(XML_Parser parser
,
1720 XML_StartNamespaceDeclHandler start
) {
1722 parser
->m_startNamespaceDeclHandler
= start
;
1726 XML_SetEndNamespaceDeclHandler(XML_Parser parser
,
1727 XML_EndNamespaceDeclHandler end
) {
1729 parser
->m_endNamespaceDeclHandler
= end
;
1733 XML_SetNotStandaloneHandler(XML_Parser parser
,
1734 XML_NotStandaloneHandler handler
) {
1736 parser
->m_notStandaloneHandler
= handler
;
1740 XML_SetExternalEntityRefHandler(XML_Parser parser
,
1741 XML_ExternalEntityRefHandler handler
) {
1743 parser
->m_externalEntityRefHandler
= handler
;
1747 XML_SetExternalEntityRefHandlerArg(XML_Parser parser
, void *arg
) {
1751 parser
->m_externalEntityRefHandlerArg
= (XML_Parser
)arg
;
1753 parser
->m_externalEntityRefHandlerArg
= parser
;
1757 XML_SetSkippedEntityHandler(XML_Parser parser
,
1758 XML_SkippedEntityHandler handler
) {
1760 parser
->m_skippedEntityHandler
= handler
;
1764 XML_SetUnknownEncodingHandler(XML_Parser parser
,
1765 XML_UnknownEncodingHandler handler
, void *data
) {
1768 parser
->m_unknownEncodingHandler
= handler
;
1769 parser
->m_unknownEncodingHandlerData
= data
;
1773 XML_SetElementDeclHandler(XML_Parser parser
, XML_ElementDeclHandler eldecl
) {
1775 parser
->m_elementDeclHandler
= eldecl
;
1779 XML_SetAttlistDeclHandler(XML_Parser parser
, XML_AttlistDeclHandler attdecl
) {
1781 parser
->m_attlistDeclHandler
= attdecl
;
1785 XML_SetEntityDeclHandler(XML_Parser parser
, XML_EntityDeclHandler handler
) {
1787 parser
->m_entityDeclHandler
= handler
;
1791 XML_SetXmlDeclHandler(XML_Parser parser
, XML_XmlDeclHandler handler
) {
1793 parser
->m_xmlDeclHandler
= handler
;
1797 XML_SetParamEntityParsing(XML_Parser parser
,
1798 enum XML_ParamEntityParsing peParsing
) {
1801 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1802 if (parser
->m_parsingStatus
.parsing
== XML_PARSING
1803 || parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
1806 parser
->m_paramEntityParsing
= peParsing
;
1809 return peParsing
== XML_PARAM_ENTITY_PARSING_NEVER
;
1814 XML_SetHashSalt(XML_Parser parser
, unsigned long hash_salt
) {
1817 if (parser
->m_parentParser
)
1818 return XML_SetHashSalt(parser
->m_parentParser
, hash_salt
);
1819 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1820 if (parser
->m_parsingStatus
.parsing
== XML_PARSING
1821 || parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
1823 parser
->m_hash_secret_salt
= hash_salt
;
1827 enum XML_Status XMLCALL
1828 XML_Parse(XML_Parser parser
, const char *s
, int len
, int isFinal
) {
1829 if ((parser
== NULL
) || (len
< 0) || ((s
== NULL
) && (len
!= 0))) {
1831 parser
->m_errorCode
= XML_ERROR_INVALID_ARGUMENT
;
1832 return XML_STATUS_ERROR
;
1834 switch (parser
->m_parsingStatus
.parsing
) {
1836 parser
->m_errorCode
= XML_ERROR_SUSPENDED
;
1837 return XML_STATUS_ERROR
;
1839 parser
->m_errorCode
= XML_ERROR_FINISHED
;
1840 return XML_STATUS_ERROR
;
1841 case XML_INITIALIZED
:
1842 if (parser
->m_parentParser
== NULL
&& ! startParsing(parser
)) {
1843 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
1844 return XML_STATUS_ERROR
;
1848 parser
->m_parsingStatus
.parsing
= XML_PARSING
;
1852 parser
->m_parsingStatus
.finalBuffer
= (XML_Bool
)isFinal
;
1854 return XML_STATUS_OK
;
1855 parser
->m_positionPtr
= parser
->m_bufferPtr
;
1856 parser
->m_parseEndPtr
= parser
->m_bufferEnd
;
1858 /* If data are left over from last buffer, and we now know that these
1859 data are the final chunk of input, then we have to check them again
1860 to detect errors based on that fact.
1863 = parser
->m_processor(parser
, parser
->m_bufferPtr
,
1864 parser
->m_parseEndPtr
, &parser
->m_bufferPtr
);
1866 if (parser
->m_errorCode
== XML_ERROR_NONE
) {
1867 switch (parser
->m_parsingStatus
.parsing
) {
1869 /* It is hard to be certain, but it seems that this case
1870 * cannot occur. This code is cleaning up a previous parse
1871 * with no new data (since len == 0). Changing the parsing
1872 * state requires getting to execute a handler function, and
1873 * there doesn't seem to be an opportunity for that while in
1874 * this circumstance.
1876 * Given the uncertainty, we retain the code but exclude it
1877 * from coverage tests.
1881 XmlUpdatePosition(parser
->m_encoding
, parser
->m_positionPtr
,
1882 parser
->m_bufferPtr
, &parser
->m_position
);
1883 parser
->m_positionPtr
= parser
->m_bufferPtr
;
1884 return XML_STATUS_SUSPENDED
;
1885 /* LCOV_EXCL_STOP */
1886 case XML_INITIALIZED
:
1888 parser
->m_parsingStatus
.parsing
= XML_FINISHED
;
1891 return XML_STATUS_OK
;
1894 parser
->m_eventEndPtr
= parser
->m_eventPtr
;
1895 parser
->m_processor
= errorProcessor
;
1896 return XML_STATUS_ERROR
;
1898 #ifndef XML_CONTEXT_BYTES
1899 else if (parser
->m_bufferPtr
== parser
->m_bufferEnd
) {
1902 enum XML_Status result
;
1903 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1904 if ((XML_Size
)len
> ((XML_Size
)-1) / 2 - parser
->m_parseEndByteIndex
) {
1905 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
1906 parser
->m_eventPtr
= parser
->m_eventEndPtr
= NULL
;
1907 parser
->m_processor
= errorProcessor
;
1908 return XML_STATUS_ERROR
;
1910 parser
->m_parseEndByteIndex
+= len
;
1911 parser
->m_positionPtr
= s
;
1912 parser
->m_parsingStatus
.finalBuffer
= (XML_Bool
)isFinal
;
1915 = parser
->m_processor(parser
, s
, parser
->m_parseEndPtr
= s
+ len
, &end
);
1917 if (parser
->m_errorCode
!= XML_ERROR_NONE
) {
1918 parser
->m_eventEndPtr
= parser
->m_eventPtr
;
1919 parser
->m_processor
= errorProcessor
;
1920 return XML_STATUS_ERROR
;
1922 switch (parser
->m_parsingStatus
.parsing
) {
1924 result
= XML_STATUS_SUSPENDED
;
1926 case XML_INITIALIZED
:
1929 parser
->m_parsingStatus
.parsing
= XML_FINISHED
;
1930 return XML_STATUS_OK
;
1934 result
= XML_STATUS_OK
;
1938 XmlUpdatePosition(parser
->m_encoding
, parser
->m_positionPtr
, end
,
1939 &parser
->m_position
);
1940 nLeftOver
= s
+ len
- end
;
1942 if (parser
->m_buffer
== NULL
1943 || nLeftOver
> parser
->m_bufferLim
- parser
->m_buffer
) {
1944 /* avoid _signed_ integer overflow */
1946 const int bytesToAllocate
= (int)((unsigned)len
* 2U);
1947 if (bytesToAllocate
> 0) {
1948 temp
= (char *)REALLOC(parser
, parser
->m_buffer
, bytesToAllocate
);
1951 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
1952 parser
->m_eventPtr
= parser
->m_eventEndPtr
= NULL
;
1953 parser
->m_processor
= errorProcessor
;
1954 return XML_STATUS_ERROR
;
1956 parser
->m_buffer
= temp
;
1957 parser
->m_bufferLim
= parser
->m_buffer
+ bytesToAllocate
;
1959 memcpy(parser
->m_buffer
, end
, nLeftOver
);
1961 parser
->m_bufferPtr
= parser
->m_buffer
;
1962 parser
->m_bufferEnd
= parser
->m_buffer
+ nLeftOver
;
1963 parser
->m_positionPtr
= parser
->m_bufferPtr
;
1964 parser
->m_parseEndPtr
= parser
->m_bufferEnd
;
1965 parser
->m_eventPtr
= parser
->m_bufferPtr
;
1966 parser
->m_eventEndPtr
= parser
->m_bufferPtr
;
1969 #endif /* not defined XML_CONTEXT_BYTES */
1971 void *buff
= XML_GetBuffer(parser
, len
);
1973 return XML_STATUS_ERROR
;
1975 memcpy(buff
, s
, len
);
1976 return XML_ParseBuffer(parser
, len
, isFinal
);
1981 enum XML_Status XMLCALL
1982 XML_ParseBuffer(XML_Parser parser
, int len
, int isFinal
) {
1984 enum XML_Status result
= XML_STATUS_OK
;
1987 return XML_STATUS_ERROR
;
1988 switch (parser
->m_parsingStatus
.parsing
) {
1990 parser
->m_errorCode
= XML_ERROR_SUSPENDED
;
1991 return XML_STATUS_ERROR
;
1993 parser
->m_errorCode
= XML_ERROR_FINISHED
;
1994 return XML_STATUS_ERROR
;
1995 case XML_INITIALIZED
:
1996 /* Has someone called XML_GetBuffer successfully before? */
1997 if (! parser
->m_bufferPtr
) {
1998 parser
->m_errorCode
= XML_ERROR_NO_BUFFER
;
1999 return XML_STATUS_ERROR
;
2002 if (parser
->m_parentParser
== NULL
&& ! startParsing(parser
)) {
2003 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
2004 return XML_STATUS_ERROR
;
2008 parser
->m_parsingStatus
.parsing
= XML_PARSING
;
2011 start
= parser
->m_bufferPtr
;
2012 parser
->m_positionPtr
= start
;
2013 parser
->m_bufferEnd
+= len
;
2014 parser
->m_parseEndPtr
= parser
->m_bufferEnd
;
2015 parser
->m_parseEndByteIndex
+= len
;
2016 parser
->m_parsingStatus
.finalBuffer
= (XML_Bool
)isFinal
;
2018 parser
->m_errorCode
= parser
->m_processor(
2019 parser
, start
, parser
->m_parseEndPtr
, &parser
->m_bufferPtr
);
2021 if (parser
->m_errorCode
!= XML_ERROR_NONE
) {
2022 parser
->m_eventEndPtr
= parser
->m_eventPtr
;
2023 parser
->m_processor
= errorProcessor
;
2024 return XML_STATUS_ERROR
;
2026 switch (parser
->m_parsingStatus
.parsing
) {
2028 result
= XML_STATUS_SUSPENDED
;
2030 case XML_INITIALIZED
:
2033 parser
->m_parsingStatus
.parsing
= XML_FINISHED
;
2036 default:; /* should not happen */
2040 XmlUpdatePosition(parser
->m_encoding
, parser
->m_positionPtr
,
2041 parser
->m_bufferPtr
, &parser
->m_position
);
2042 parser
->m_positionPtr
= parser
->m_bufferPtr
;
2047 XML_GetBuffer(XML_Parser parser
, int len
) {
2051 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
2054 switch (parser
->m_parsingStatus
.parsing
) {
2056 parser
->m_errorCode
= XML_ERROR_SUSPENDED
;
2059 parser
->m_errorCode
= XML_ERROR_FINISHED
;
2064 if (len
> EXPAT_SAFE_PTR_DIFF(parser
->m_bufferLim
, parser
->m_bufferEnd
)) {
2065 #ifdef XML_CONTEXT_BYTES
2067 #endif /* defined XML_CONTEXT_BYTES */
2068 /* Do not invoke signed arithmetic overflow: */
2069 int neededSize
= (int)((unsigned)len
2070 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2071 parser
->m_bufferEnd
, parser
->m_bufferPtr
));
2072 if (neededSize
< 0) {
2073 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
2076 #ifdef XML_CONTEXT_BYTES
2077 keep
= (int)EXPAT_SAFE_PTR_DIFF(parser
->m_bufferPtr
, parser
->m_buffer
);
2078 if (keep
> XML_CONTEXT_BYTES
)
2079 keep
= XML_CONTEXT_BYTES
;
2080 /* Detect and prevent integer overflow */
2081 if (keep
> INT_MAX
- neededSize
) {
2082 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
2086 #endif /* defined XML_CONTEXT_BYTES */
2088 <= EXPAT_SAFE_PTR_DIFF(parser
->m_bufferLim
, parser
->m_buffer
)) {
2089 #ifdef XML_CONTEXT_BYTES
2090 if (keep
< EXPAT_SAFE_PTR_DIFF(parser
->m_bufferPtr
, parser
->m_buffer
)) {
2092 = (int)EXPAT_SAFE_PTR_DIFF(parser
->m_bufferPtr
, parser
->m_buffer
)
2094 /* The buffer pointers cannot be NULL here; we have at least some bytes
2096 memmove(parser
->m_buffer
, &parser
->m_buffer
[offset
],
2097 parser
->m_bufferEnd
- parser
->m_bufferPtr
+ keep
);
2098 parser
->m_bufferEnd
-= offset
;
2099 parser
->m_bufferPtr
-= offset
;
2102 if (parser
->m_buffer
&& parser
->m_bufferPtr
) {
2103 memmove(parser
->m_buffer
, parser
->m_bufferPtr
,
2104 EXPAT_SAFE_PTR_DIFF(parser
->m_bufferEnd
, parser
->m_bufferPtr
));
2107 + EXPAT_SAFE_PTR_DIFF(parser
->m_bufferEnd
, parser
->m_bufferPtr
);
2108 parser
->m_bufferPtr
= parser
->m_buffer
;
2110 #endif /* not defined XML_CONTEXT_BYTES */
2114 = (int)EXPAT_SAFE_PTR_DIFF(parser
->m_bufferLim
, parser
->m_bufferPtr
);
2115 if (bufferSize
== 0)
2116 bufferSize
= INIT_BUFFER_SIZE
;
2118 /* Do not invoke signed arithmetic overflow: */
2119 bufferSize
= (int)(2U * (unsigned)bufferSize
);
2120 } while (bufferSize
< neededSize
&& bufferSize
> 0);
2121 if (bufferSize
<= 0) {
2122 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
2125 newBuf
= (char *)MALLOC(parser
, bufferSize
);
2127 parser
->m_errorCode
= XML_ERROR_NO_MEMORY
;
2130 parser
->m_bufferLim
= newBuf
+ bufferSize
;
2131 #ifdef XML_CONTEXT_BYTES
2132 if (parser
->m_bufferPtr
) {
2133 memcpy(newBuf
, &parser
->m_bufferPtr
[-keep
],
2134 EXPAT_SAFE_PTR_DIFF(parser
->m_bufferEnd
, parser
->m_bufferPtr
)
2136 FREE(parser
, parser
->m_buffer
);
2137 parser
->m_buffer
= newBuf
;
2140 + EXPAT_SAFE_PTR_DIFF(parser
->m_bufferEnd
, parser
->m_bufferPtr
)
2142 parser
->m_bufferPtr
= parser
->m_buffer
+ keep
;
2144 /* This must be a brand new buffer with no data in it yet */
2145 parser
->m_bufferEnd
= newBuf
;
2146 parser
->m_bufferPtr
= parser
->m_buffer
= newBuf
;
2149 if (parser
->m_bufferPtr
) {
2150 memcpy(newBuf
, parser
->m_bufferPtr
,
2151 EXPAT_SAFE_PTR_DIFF(parser
->m_bufferEnd
, parser
->m_bufferPtr
));
2152 FREE(parser
, parser
->m_buffer
);
2155 + EXPAT_SAFE_PTR_DIFF(parser
->m_bufferEnd
, parser
->m_bufferPtr
);
2157 /* This must be a brand new buffer with no data in it yet */
2158 parser
->m_bufferEnd
= newBuf
;
2160 parser
->m_bufferPtr
= parser
->m_buffer
= newBuf
;
2161 #endif /* not defined XML_CONTEXT_BYTES */
2163 parser
->m_eventPtr
= parser
->m_eventEndPtr
= NULL
;
2164 parser
->m_positionPtr
= NULL
;
2166 return parser
->m_bufferEnd
;
2169 enum XML_Status XMLCALL
2170 XML_StopParser(XML_Parser parser
, XML_Bool resumable
) {
2172 return XML_STATUS_ERROR
;
2173 switch (parser
->m_parsingStatus
.parsing
) {
2176 parser
->m_errorCode
= XML_ERROR_SUSPENDED
;
2177 return XML_STATUS_ERROR
;
2179 parser
->m_parsingStatus
.parsing
= XML_FINISHED
;
2182 parser
->m_errorCode
= XML_ERROR_FINISHED
;
2183 return XML_STATUS_ERROR
;
2187 if (parser
->m_isParamEntity
) {
2188 parser
->m_errorCode
= XML_ERROR_SUSPEND_PE
;
2189 return XML_STATUS_ERROR
;
2192 parser
->m_parsingStatus
.parsing
= XML_SUSPENDED
;
2194 parser
->m_parsingStatus
.parsing
= XML_FINISHED
;
2196 return XML_STATUS_OK
;
2199 enum XML_Status XMLCALL
2200 XML_ResumeParser(XML_Parser parser
) {
2201 enum XML_Status result
= XML_STATUS_OK
;
2204 return XML_STATUS_ERROR
;
2205 if (parser
->m_parsingStatus
.parsing
!= XML_SUSPENDED
) {
2206 parser
->m_errorCode
= XML_ERROR_NOT_SUSPENDED
;
2207 return XML_STATUS_ERROR
;
2209 parser
->m_parsingStatus
.parsing
= XML_PARSING
;
2211 parser
->m_errorCode
= parser
->m_processor(
2212 parser
, parser
->m_bufferPtr
, parser
->m_parseEndPtr
, &parser
->m_bufferPtr
);
2214 if (parser
->m_errorCode
!= XML_ERROR_NONE
) {
2215 parser
->m_eventEndPtr
= parser
->m_eventPtr
;
2216 parser
->m_processor
= errorProcessor
;
2217 return XML_STATUS_ERROR
;
2219 switch (parser
->m_parsingStatus
.parsing
) {
2221 result
= XML_STATUS_SUSPENDED
;
2223 case XML_INITIALIZED
:
2225 if (parser
->m_parsingStatus
.finalBuffer
) {
2226 parser
->m_parsingStatus
.parsing
= XML_FINISHED
;
2233 XmlUpdatePosition(parser
->m_encoding
, parser
->m_positionPtr
,
2234 parser
->m_bufferPtr
, &parser
->m_position
);
2235 parser
->m_positionPtr
= parser
->m_bufferPtr
;
2240 XML_GetParsingStatus(XML_Parser parser
, XML_ParsingStatus
*status
) {
2243 assert(status
!= NULL
);
2244 *status
= parser
->m_parsingStatus
;
2247 enum XML_Error XMLCALL
2248 XML_GetErrorCode(XML_Parser parser
) {
2250 return XML_ERROR_INVALID_ARGUMENT
;
2251 return parser
->m_errorCode
;
2255 XML_GetCurrentByteIndex(XML_Parser parser
) {
2258 if (parser
->m_eventPtr
)
2259 return (XML_Index
)(parser
->m_parseEndByteIndex
2260 - (parser
->m_parseEndPtr
- parser
->m_eventPtr
));
2265 XML_GetCurrentByteCount(XML_Parser parser
) {
2268 if (parser
->m_eventEndPtr
&& parser
->m_eventPtr
)
2269 return (int)(parser
->m_eventEndPtr
- parser
->m_eventPtr
);
2274 XML_GetInputContext(XML_Parser parser
, int *offset
, int *size
) {
2275 #ifdef XML_CONTEXT_BYTES
2278 if (parser
->m_eventPtr
&& parser
->m_buffer
) {
2280 *offset
= (int)(parser
->m_eventPtr
- parser
->m_buffer
);
2282 *size
= (int)(parser
->m_bufferEnd
- parser
->m_buffer
);
2283 return parser
->m_buffer
;
2289 #endif /* defined XML_CONTEXT_BYTES */
2290 return (const char *)0;
2294 XML_GetCurrentLineNumber(XML_Parser parser
) {
2297 if (parser
->m_eventPtr
&& parser
->m_eventPtr
>= parser
->m_positionPtr
) {
2298 XmlUpdatePosition(parser
->m_encoding
, parser
->m_positionPtr
,
2299 parser
->m_eventPtr
, &parser
->m_position
);
2300 parser
->m_positionPtr
= parser
->m_eventPtr
;
2302 return parser
->m_position
.lineNumber
+ 1;
2306 XML_GetCurrentColumnNumber(XML_Parser parser
) {
2309 if (parser
->m_eventPtr
&& parser
->m_eventPtr
>= parser
->m_positionPtr
) {
2310 XmlUpdatePosition(parser
->m_encoding
, parser
->m_positionPtr
,
2311 parser
->m_eventPtr
, &parser
->m_position
);
2312 parser
->m_positionPtr
= parser
->m_eventPtr
;
2314 return parser
->m_position
.columnNumber
;
2318 XML_FreeContentModel(XML_Parser parser
, XML_Content
*model
) {
2320 FREE(parser
, model
);
2324 XML_MemMalloc(XML_Parser parser
, size_t size
) {
2327 return MALLOC(parser
, size
);
2331 XML_MemRealloc(XML_Parser parser
, void *ptr
, size_t size
) {
2334 return REALLOC(parser
, ptr
, size
);
2338 XML_MemFree(XML_Parser parser
, void *ptr
) {
2344 XML_DefaultCurrent(XML_Parser parser
) {
2347 if (parser
->m_defaultHandler
) {
2348 if (parser
->m_openInternalEntities
)
2349 reportDefault(parser
, parser
->m_internalEncoding
,
2350 parser
->m_openInternalEntities
->internalEventPtr
,
2351 parser
->m_openInternalEntities
->internalEventEndPtr
);
2353 reportDefault(parser
, parser
->m_encoding
, parser
->m_eventPtr
,
2354 parser
->m_eventEndPtr
);
2358 const XML_LChar
*XMLCALL
2359 XML_ErrorString(enum XML_Error code
) {
2361 case XML_ERROR_NONE
:
2363 case XML_ERROR_NO_MEMORY
:
2364 return XML_L("out of memory");
2365 case XML_ERROR_SYNTAX
:
2366 return XML_L("syntax error");
2367 case XML_ERROR_NO_ELEMENTS
:
2368 return XML_L("no element found");
2369 case XML_ERROR_INVALID_TOKEN
:
2370 return XML_L("not well-formed (invalid token)");
2371 case XML_ERROR_UNCLOSED_TOKEN
:
2372 return XML_L("unclosed token");
2373 case XML_ERROR_PARTIAL_CHAR
:
2374 return XML_L("partial character");
2375 case XML_ERROR_TAG_MISMATCH
:
2376 return XML_L("mismatched tag");
2377 case XML_ERROR_DUPLICATE_ATTRIBUTE
:
2378 return XML_L("duplicate attribute");
2379 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT
:
2380 return XML_L("junk after document element");
2381 case XML_ERROR_PARAM_ENTITY_REF
:
2382 return XML_L("illegal parameter entity reference");
2383 case XML_ERROR_UNDEFINED_ENTITY
:
2384 return XML_L("undefined entity");
2385 case XML_ERROR_RECURSIVE_ENTITY_REF
:
2386 return XML_L("recursive entity reference");
2387 case XML_ERROR_ASYNC_ENTITY
:
2388 return XML_L("asynchronous entity");
2389 case XML_ERROR_BAD_CHAR_REF
:
2390 return XML_L("reference to invalid character number");
2391 case XML_ERROR_BINARY_ENTITY_REF
:
2392 return XML_L("reference to binary entity");
2393 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
:
2394 return XML_L("reference to external entity in attribute");
2395 case XML_ERROR_MISPLACED_XML_PI
:
2396 return XML_L("XML or text declaration not at start of entity");
2397 case XML_ERROR_UNKNOWN_ENCODING
:
2398 return XML_L("unknown encoding");
2399 case XML_ERROR_INCORRECT_ENCODING
:
2400 return XML_L("encoding specified in XML declaration is incorrect");
2401 case XML_ERROR_UNCLOSED_CDATA_SECTION
:
2402 return XML_L("unclosed CDATA section");
2403 case XML_ERROR_EXTERNAL_ENTITY_HANDLING
:
2404 return XML_L("error in processing external entity reference");
2405 case XML_ERROR_NOT_STANDALONE
:
2406 return XML_L("document is not standalone");
2407 case XML_ERROR_UNEXPECTED_STATE
:
2408 return XML_L("unexpected parser state - please send a bug report");
2409 case XML_ERROR_ENTITY_DECLARED_IN_PE
:
2410 return XML_L("entity declared in parameter entity");
2411 case XML_ERROR_FEATURE_REQUIRES_XML_DTD
:
2412 return XML_L("requested feature requires XML_DTD support in Expat");
2413 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
:
2414 return XML_L("cannot change setting once parsing has begun");
2415 /* Added in 1.95.7. */
2416 case XML_ERROR_UNBOUND_PREFIX
:
2417 return XML_L("unbound prefix");
2418 /* Added in 1.95.8. */
2419 case XML_ERROR_UNDECLARING_PREFIX
:
2420 return XML_L("must not undeclare prefix");
2421 case XML_ERROR_INCOMPLETE_PE
:
2422 return XML_L("incomplete markup in parameter entity");
2423 case XML_ERROR_XML_DECL
:
2424 return XML_L("XML declaration not well-formed");
2425 case XML_ERROR_TEXT_DECL
:
2426 return XML_L("text declaration not well-formed");
2427 case XML_ERROR_PUBLICID
:
2428 return XML_L("illegal character(s) in public id");
2429 case XML_ERROR_SUSPENDED
:
2430 return XML_L("parser suspended");
2431 case XML_ERROR_NOT_SUSPENDED
:
2432 return XML_L("parser not suspended");
2433 case XML_ERROR_ABORTED
:
2434 return XML_L("parsing aborted");
2435 case XML_ERROR_FINISHED
:
2436 return XML_L("parsing finished");
2437 case XML_ERROR_SUSPEND_PE
:
2438 return XML_L("cannot suspend in external parameter entity");
2439 /* Added in 2.0.0. */
2440 case XML_ERROR_RESERVED_PREFIX_XML
:
2442 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2443 case XML_ERROR_RESERVED_PREFIX_XMLNS
:
2444 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2445 case XML_ERROR_RESERVED_NAMESPACE_URI
:
2447 "prefix must not be bound to one of the reserved namespace names");
2448 /* Added in 2.2.5. */
2449 case XML_ERROR_INVALID_ARGUMENT
: /* Constant added in 2.2.1, already */
2450 return XML_L("invalid argument");
2451 /* Added in 2.3.0. */
2452 case XML_ERROR_NO_BUFFER
:
2454 "a successful prior call to function XML_GetBuffer is required");
2455 /* Added in 2.4.0. */
2456 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH
:
2458 "limit on input amplification factor (from DTD and entities) breached");
2463 const XML_LChar
*XMLCALL
2464 XML_ExpatVersion(void) {
2465 /* V1 is used to string-ize the version number. However, it would
2466 string-ize the actual version macro *names* unless we get them
2467 substituted before being passed to V1. CPP is defined to expand
2468 a macro, then rescan for more expansions. Thus, we use V2 to expand
2469 the version macros, then CPP will expand the resulting V1() macro
2470 with the correct numerals. */
2471 /* ### I'm assuming cpp is portable in this respect... */
2473 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2474 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2476 return V2(XML_MAJOR_VERSION
, XML_MINOR_VERSION
, XML_MICRO_VERSION
);
2482 XML_Expat_Version XMLCALL
2483 XML_ExpatVersionInfo(void) {
2484 XML_Expat_Version version
;
2486 version
.major
= XML_MAJOR_VERSION
;
2487 version
.minor
= XML_MINOR_VERSION
;
2488 version
.micro
= XML_MICRO_VERSION
;
2493 const XML_Feature
*XMLCALL
2494 XML_GetFeatureList(void) {
2495 static const XML_Feature features
[] = {
2496 {XML_FEATURE_SIZEOF_XML_CHAR
, XML_L("sizeof(XML_Char)"),
2498 {XML_FEATURE_SIZEOF_XML_LCHAR
, XML_L("sizeof(XML_LChar)"),
2501 {XML_FEATURE_UNICODE
, XML_L("XML_UNICODE"), 0},
2503 #ifdef XML_UNICODE_WCHAR_T
2504 {XML_FEATURE_UNICODE_WCHAR_T
, XML_L("XML_UNICODE_WCHAR_T"), 0},
2507 {XML_FEATURE_DTD
, XML_L("XML_DTD"), 0},
2509 #ifdef XML_CONTEXT_BYTES
2510 {XML_FEATURE_CONTEXT_BYTES
, XML_L("XML_CONTEXT_BYTES"),
2514 {XML_FEATURE_MIN_SIZE
, XML_L("XML_MIN_SIZE"), 0},
2517 {XML_FEATURE_NS
, XML_L("XML_NS"), 0},
2519 #ifdef XML_LARGE_SIZE
2520 {XML_FEATURE_LARGE_SIZE
, XML_L("XML_LARGE_SIZE"), 0},
2522 #ifdef XML_ATTR_INFO
2523 {XML_FEATURE_ATTR_INFO
, XML_L("XML_ATTR_INFO"), 0},
2526 /* Added in Expat 2.4.0. */
2527 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT
,
2528 XML_L("XML_BLAP_MAX_AMP"),
2530 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT
},
2531 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
,
2532 XML_L("XML_BLAP_ACT_THRES"),
2533 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
},
2535 {XML_FEATURE_END
, NULL
, 0}};
2542 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2543 XML_Parser parser
, float maximumAmplificationFactor
) {
2544 if ((parser
== NULL
) || (parser
->m_parentParser
!= NULL
)
2545 || isnan(maximumAmplificationFactor
)
2546 || (maximumAmplificationFactor
< 1.0f
)) {
2549 parser
->m_accounting
.maximumAmplificationFactor
= maximumAmplificationFactor
;
2554 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2555 XML_Parser parser
, unsigned long long activationThresholdBytes
) {
2556 if ((parser
== NULL
) || (parser
->m_parentParser
!= NULL
)) {
2559 parser
->m_accounting
.activationThresholdBytes
= activationThresholdBytes
;
2562 #endif /* XML_DTD */
2564 /* Initially tag->rawName always points into the parse buffer;
2565 for those TAG instances opened while the current parse buffer was
2566 processed, and not yet closed, we need to store tag->rawName in a more
2567 permanent location, since the parse buffer is about to be discarded.
2570 storeRawNames(XML_Parser parser
) {
2571 TAG
*tag
= parser
->m_tagStack
;
2574 int nameLen
= sizeof(XML_Char
) * (tag
->name
.strLen
+ 1);
2576 char *rawNameBuf
= tag
->buf
+ nameLen
;
2577 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2578 at the first entry that has already been copied; everything
2579 below it in the stack is already been accounted for in a
2580 previous call to this function.
2582 if (tag
->rawName
== rawNameBuf
)
2584 /* For re-use purposes we need to ensure that the
2585 size of tag->buf is a multiple of sizeof(XML_Char).
2587 rawNameLen
= ROUND_UP(tag
->rawNameLength
, sizeof(XML_Char
));
2588 /* Detect and prevent integer overflow. */
2589 if (rawNameLen
> (size_t)INT_MAX
- nameLen
)
2591 bufSize
= nameLen
+ (int)rawNameLen
;
2592 if (bufSize
> tag
->bufEnd
- tag
->buf
) {
2593 char *temp
= (char *)REALLOC(parser
, tag
->buf
, bufSize
);
2596 /* if tag->name.str points to tag->buf (only when namespace
2597 processing is off) then we have to update it
2599 if (tag
->name
.str
== (XML_Char
*)tag
->buf
)
2600 tag
->name
.str
= (XML_Char
*)temp
;
2601 /* if tag->name.localPart is set (when namespace processing is on)
2602 then update it as well, since it will always point into tag->buf
2604 if (tag
->name
.localPart
)
2606 = (XML_Char
*)temp
+ (tag
->name
.localPart
- (XML_Char
*)tag
->buf
);
2608 tag
->bufEnd
= temp
+ bufSize
;
2609 rawNameBuf
= temp
+ nameLen
;
2611 memcpy(rawNameBuf
, tag
->rawName
, tag
->rawNameLength
);
2612 tag
->rawName
= rawNameBuf
;
2618 static enum XML_Error PTRCALL
2619 contentProcessor(XML_Parser parser
, const char *start
, const char *end
,
2620 const char **endPtr
) {
2621 enum XML_Error result
= doContent(
2622 parser
, 0, parser
->m_encoding
, start
, end
, endPtr
,
2623 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
, XML_ACCOUNT_DIRECT
);
2624 if (result
== XML_ERROR_NONE
) {
2625 if (! storeRawNames(parser
))
2626 return XML_ERROR_NO_MEMORY
;
2631 static enum XML_Error PTRCALL
2632 externalEntityInitProcessor(XML_Parser parser
, const char *start
,
2633 const char *end
, const char **endPtr
) {
2634 enum XML_Error result
= initializeEncoding(parser
);
2635 if (result
!= XML_ERROR_NONE
)
2637 parser
->m_processor
= externalEntityInitProcessor2
;
2638 return externalEntityInitProcessor2(parser
, start
, end
, endPtr
);
2641 static enum XML_Error PTRCALL
2642 externalEntityInitProcessor2(XML_Parser parser
, const char *start
,
2643 const char *end
, const char **endPtr
) {
2644 const char *next
= start
; /* XmlContentTok doesn't always set the last arg */
2645 int tok
= XmlContentTok(parser
->m_encoding
, start
, end
, &next
);
2649 if (! accountingDiffTolerated(parser
, tok
, start
, next
, __LINE__
,
2650 XML_ACCOUNT_DIRECT
)) {
2651 accountingOnAbort(parser
);
2652 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
2654 #endif /* XML_DTD */
2656 /* If we are at the end of the buffer, this would cause the next stage,
2657 i.e. externalEntityInitProcessor3, to pass control directly to
2658 doContent (by detecting XML_TOK_NONE) without processing any xml text
2659 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2661 if (next
== end
&& ! parser
->m_parsingStatus
.finalBuffer
) {
2663 return XML_ERROR_NONE
;
2667 case XML_TOK_PARTIAL
:
2668 if (! parser
->m_parsingStatus
.finalBuffer
) {
2670 return XML_ERROR_NONE
;
2672 parser
->m_eventPtr
= start
;
2673 return XML_ERROR_UNCLOSED_TOKEN
;
2674 case XML_TOK_PARTIAL_CHAR
:
2675 if (! parser
->m_parsingStatus
.finalBuffer
) {
2677 return XML_ERROR_NONE
;
2679 parser
->m_eventPtr
= start
;
2680 return XML_ERROR_PARTIAL_CHAR
;
2682 parser
->m_processor
= externalEntityInitProcessor3
;
2683 return externalEntityInitProcessor3(parser
, start
, end
, endPtr
);
2686 static enum XML_Error PTRCALL
2687 externalEntityInitProcessor3(XML_Parser parser
, const char *start
,
2688 const char *end
, const char **endPtr
) {
2690 const char *next
= start
; /* XmlContentTok doesn't always set the last arg */
2691 parser
->m_eventPtr
= start
;
2692 tok
= XmlContentTok(parser
->m_encoding
, start
, end
, &next
);
2693 /* Note: These bytes are accounted later in:
2695 - externalEntityContentProcessor
2697 parser
->m_eventEndPtr
= next
;
2700 case XML_TOK_XML_DECL
: {
2701 enum XML_Error result
;
2702 result
= processXmlDecl(parser
, 1, start
, next
);
2703 if (result
!= XML_ERROR_NONE
)
2705 switch (parser
->m_parsingStatus
.parsing
) {
2708 return XML_ERROR_NONE
;
2710 return XML_ERROR_ABORTED
;
2715 case XML_TOK_PARTIAL
:
2716 if (! parser
->m_parsingStatus
.finalBuffer
) {
2718 return XML_ERROR_NONE
;
2720 return XML_ERROR_UNCLOSED_TOKEN
;
2721 case XML_TOK_PARTIAL_CHAR
:
2722 if (! parser
->m_parsingStatus
.finalBuffer
) {
2724 return XML_ERROR_NONE
;
2726 return XML_ERROR_PARTIAL_CHAR
;
2728 parser
->m_processor
= externalEntityContentProcessor
;
2729 parser
->m_tagLevel
= 1;
2730 return externalEntityContentProcessor(parser
, start
, end
, endPtr
);
2733 static enum XML_Error PTRCALL
2734 externalEntityContentProcessor(XML_Parser parser
, const char *start
,
2735 const char *end
, const char **endPtr
) {
2736 enum XML_Error result
2737 = doContent(parser
, 1, parser
->m_encoding
, start
, end
, endPtr
,
2738 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
,
2739 XML_ACCOUNT_ENTITY_EXPANSION
);
2740 if (result
== XML_ERROR_NONE
) {
2741 if (! storeRawNames(parser
))
2742 return XML_ERROR_NO_MEMORY
;
2747 static enum XML_Error
2748 doContent(XML_Parser parser
, int startTagLevel
, const ENCODING
*enc
,
2749 const char *s
, const char *end
, const char **nextPtr
,
2750 XML_Bool haveMore
, enum XML_Account account
) {
2751 /* save one level of indirection */
2752 DTD
*const dtd
= parser
->m_dtd
;
2754 const char **eventPP
;
2755 const char **eventEndPP
;
2756 if (enc
== parser
->m_encoding
) {
2757 eventPP
= &parser
->m_eventPtr
;
2758 eventEndPP
= &parser
->m_eventEndPtr
;
2760 eventPP
= &(parser
->m_openInternalEntities
->internalEventPtr
);
2761 eventEndPP
= &(parser
->m_openInternalEntities
->internalEventEndPtr
);
2766 const char *next
= s
; /* XmlContentTok doesn't always set the last arg */
2767 int tok
= XmlContentTok(enc
, s
, end
, &next
);
2769 const char *accountAfter
2770 = ((tok
== XML_TOK_TRAILING_RSQB
) || (tok
== XML_TOK_TRAILING_CR
))
2771 ? (haveMore
? s
/* i.e. 0 bytes */ : end
)
2773 if (! accountingDiffTolerated(parser
, tok
, s
, accountAfter
, __LINE__
,
2775 accountingOnAbort(parser
);
2776 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
2781 case XML_TOK_TRAILING_CR
:
2784 return XML_ERROR_NONE
;
2787 if (parser
->m_characterDataHandler
) {
2789 parser
->m_characterDataHandler(parser
->m_handlerArg
, &c
, 1);
2790 } else if (parser
->m_defaultHandler
)
2791 reportDefault(parser
, enc
, s
, end
);
2792 /* We are at the end of the final buffer, should we check for
2793 XML_SUSPENDED, XML_FINISHED?
2795 if (startTagLevel
== 0)
2796 return XML_ERROR_NO_ELEMENTS
;
2797 if (parser
->m_tagLevel
!= startTagLevel
)
2798 return XML_ERROR_ASYNC_ENTITY
;
2800 return XML_ERROR_NONE
;
2804 return XML_ERROR_NONE
;
2806 if (startTagLevel
> 0) {
2807 if (parser
->m_tagLevel
!= startTagLevel
)
2808 return XML_ERROR_ASYNC_ENTITY
;
2810 return XML_ERROR_NONE
;
2812 return XML_ERROR_NO_ELEMENTS
;
2813 case XML_TOK_INVALID
:
2815 return XML_ERROR_INVALID_TOKEN
;
2816 case XML_TOK_PARTIAL
:
2819 return XML_ERROR_NONE
;
2821 return XML_ERROR_UNCLOSED_TOKEN
;
2822 case XML_TOK_PARTIAL_CHAR
:
2825 return XML_ERROR_NONE
;
2827 return XML_ERROR_PARTIAL_CHAR
;
2828 case XML_TOK_ENTITY_REF
: {
2829 const XML_Char
*name
;
2831 XML_Char ch
= (XML_Char
)XmlPredefinedEntityName(
2832 enc
, s
+ enc
->minBytesPerChar
, next
- enc
->minBytesPerChar
);
2835 /* NOTE: We are replacing 4-6 characters original input for 1 character
2836 * so there is no amplification and hence recording without
2838 accountingDiffTolerated(parser
, tok
, (char *)&ch
,
2839 ((char *)&ch
) + sizeof(XML_Char
), __LINE__
,
2840 XML_ACCOUNT_ENTITY_EXPANSION
);
2841 #endif /* XML_DTD */
2842 if (parser
->m_characterDataHandler
)
2843 parser
->m_characterDataHandler(parser
->m_handlerArg
, &ch
, 1);
2844 else if (parser
->m_defaultHandler
)
2845 reportDefault(parser
, enc
, s
, next
);
2848 name
= poolStoreString(&dtd
->pool
, enc
, s
+ enc
->minBytesPerChar
,
2849 next
- enc
->minBytesPerChar
);
2851 return XML_ERROR_NO_MEMORY
;
2852 entity
= (ENTITY
*)lookup(parser
, &dtd
->generalEntities
, name
, 0);
2853 poolDiscard(&dtd
->pool
);
2854 /* First, determine if a check for an existing declaration is needed;
2855 if yes, check that the entity exists, and that it is internal,
2856 otherwise call the skipped entity or default handler.
2858 if (! dtd
->hasParamEntityRefs
|| dtd
->standalone
) {
2860 return XML_ERROR_UNDEFINED_ENTITY
;
2861 else if (! entity
->is_internal
)
2862 return XML_ERROR_ENTITY_DECLARED_IN_PE
;
2863 } else if (! entity
) {
2864 if (parser
->m_skippedEntityHandler
)
2865 parser
->m_skippedEntityHandler(parser
->m_handlerArg
, name
, 0);
2866 else if (parser
->m_defaultHandler
)
2867 reportDefault(parser
, enc
, s
, next
);
2871 return XML_ERROR_RECURSIVE_ENTITY_REF
;
2872 if (entity
->notation
)
2873 return XML_ERROR_BINARY_ENTITY_REF
;
2874 if (entity
->textPtr
) {
2875 enum XML_Error result
;
2876 if (! parser
->m_defaultExpandInternalEntities
) {
2877 if (parser
->m_skippedEntityHandler
)
2878 parser
->m_skippedEntityHandler(parser
->m_handlerArg
, entity
->name
,
2880 else if (parser
->m_defaultHandler
)
2881 reportDefault(parser
, enc
, s
, next
);
2884 result
= processInternalEntity(parser
, entity
, XML_FALSE
);
2885 if (result
!= XML_ERROR_NONE
)
2887 } else if (parser
->m_externalEntityRefHandler
) {
2888 const XML_Char
*context
;
2889 entity
->open
= XML_TRUE
;
2890 context
= getContext(parser
);
2891 entity
->open
= XML_FALSE
;
2893 return XML_ERROR_NO_MEMORY
;
2894 if (! parser
->m_externalEntityRefHandler(
2895 parser
->m_externalEntityRefHandlerArg
, context
, entity
->base
,
2896 entity
->systemId
, entity
->publicId
))
2897 return XML_ERROR_EXTERNAL_ENTITY_HANDLING
;
2898 poolDiscard(&parser
->m_tempPool
);
2899 } else if (parser
->m_defaultHandler
)
2900 reportDefault(parser
, enc
, s
, next
);
2903 case XML_TOK_START_TAG_NO_ATTS
:
2905 case XML_TOK_START_TAG_WITH_ATTS
: {
2907 enum XML_Error result
;
2909 if (parser
->m_freeTagList
) {
2910 tag
= parser
->m_freeTagList
;
2911 parser
->m_freeTagList
= parser
->m_freeTagList
->parent
;
2913 tag
= (TAG
*)MALLOC(parser
, sizeof(TAG
));
2915 return XML_ERROR_NO_MEMORY
;
2916 tag
->buf
= (char *)MALLOC(parser
, INIT_TAG_BUF_SIZE
);
2919 return XML_ERROR_NO_MEMORY
;
2921 tag
->bufEnd
= tag
->buf
+ INIT_TAG_BUF_SIZE
;
2923 tag
->bindings
= NULL
;
2924 tag
->parent
= parser
->m_tagStack
;
2925 parser
->m_tagStack
= tag
;
2926 tag
->name
.localPart
= NULL
;
2927 tag
->name
.prefix
= NULL
;
2928 tag
->rawName
= s
+ enc
->minBytesPerChar
;
2929 tag
->rawNameLength
= XmlNameLength(enc
, tag
->rawName
);
2930 ++parser
->m_tagLevel
;
2932 const char *rawNameEnd
= tag
->rawName
+ tag
->rawNameLength
;
2933 const char *fromPtr
= tag
->rawName
;
2934 toPtr
= (XML_Char
*)tag
->buf
;
2938 const enum XML_Convert_Result convert_res
2939 = XmlConvert(enc
, &fromPtr
, rawNameEnd
, (ICHAR
**)&toPtr
,
2940 (ICHAR
*)tag
->bufEnd
- 1);
2941 convLen
= (int)(toPtr
- (XML_Char
*)tag
->buf
);
2942 if ((fromPtr
>= rawNameEnd
)
2943 || (convert_res
== XML_CONVERT_INPUT_INCOMPLETE
)) {
2944 tag
->name
.strLen
= convLen
;
2947 bufSize
= (int)(tag
->bufEnd
- tag
->buf
) << 1;
2949 char *temp
= (char *)REALLOC(parser
, tag
->buf
, bufSize
);
2951 return XML_ERROR_NO_MEMORY
;
2953 tag
->bufEnd
= temp
+ bufSize
;
2954 toPtr
= (XML_Char
*)temp
+ convLen
;
2958 tag
->name
.str
= (XML_Char
*)tag
->buf
;
2959 *toPtr
= XML_T('\0');
2961 = storeAtts(parser
, enc
, s
, &(tag
->name
), &(tag
->bindings
), account
);
2964 if (parser
->m_startElementHandler
)
2965 parser
->m_startElementHandler(parser
->m_handlerArg
, tag
->name
.str
,
2966 (const XML_Char
**)parser
->m_atts
);
2967 else if (parser
->m_defaultHandler
)
2968 reportDefault(parser
, enc
, s
, next
);
2969 poolClear(&parser
->m_tempPool
);
2972 case XML_TOK_EMPTY_ELEMENT_NO_ATTS
:
2974 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS
: {
2975 const char *rawName
= s
+ enc
->minBytesPerChar
;
2976 enum XML_Error result
;
2977 BINDING
*bindings
= NULL
;
2978 XML_Bool noElmHandlers
= XML_TRUE
;
2980 name
.str
= poolStoreString(&parser
->m_tempPool
, enc
, rawName
,
2981 rawName
+ XmlNameLength(enc
, rawName
));
2983 return XML_ERROR_NO_MEMORY
;
2984 poolFinish(&parser
->m_tempPool
);
2985 result
= storeAtts(parser
, enc
, s
, &name
, &bindings
,
2986 XML_ACCOUNT_NONE
/* token spans whole start tag */);
2987 if (result
!= XML_ERROR_NONE
) {
2988 freeBindings(parser
, bindings
);
2991 poolFinish(&parser
->m_tempPool
);
2992 if (parser
->m_startElementHandler
) {
2993 parser
->m_startElementHandler(parser
->m_handlerArg
, name
.str
,
2994 (const XML_Char
**)parser
->m_atts
);
2995 noElmHandlers
= XML_FALSE
;
2997 if (parser
->m_endElementHandler
) {
2998 if (parser
->m_startElementHandler
)
2999 *eventPP
= *eventEndPP
;
3000 parser
->m_endElementHandler(parser
->m_handlerArg
, name
.str
);
3001 noElmHandlers
= XML_FALSE
;
3003 if (noElmHandlers
&& parser
->m_defaultHandler
)
3004 reportDefault(parser
, enc
, s
, next
);
3005 poolClear(&parser
->m_tempPool
);
3006 freeBindings(parser
, bindings
);
3008 if ((parser
->m_tagLevel
== 0)
3009 && (parser
->m_parsingStatus
.parsing
!= XML_FINISHED
)) {
3010 if (parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
3011 parser
->m_processor
= epilogProcessor
;
3013 return epilogProcessor(parser
, next
, end
, nextPtr
);
3016 case XML_TOK_END_TAG
:
3017 if (parser
->m_tagLevel
== startTagLevel
)
3018 return XML_ERROR_ASYNC_ENTITY
;
3021 const char *rawName
;
3022 TAG
*tag
= parser
->m_tagStack
;
3023 rawName
= s
+ enc
->minBytesPerChar
* 2;
3024 len
= XmlNameLength(enc
, rawName
);
3025 if (len
!= tag
->rawNameLength
3026 || memcmp(tag
->rawName
, rawName
, len
) != 0) {
3028 return XML_ERROR_TAG_MISMATCH
;
3030 parser
->m_tagStack
= tag
->parent
;
3031 tag
->parent
= parser
->m_freeTagList
;
3032 parser
->m_freeTagList
= tag
;
3033 --parser
->m_tagLevel
;
3034 if (parser
->m_endElementHandler
) {
3035 const XML_Char
*localPart
;
3036 const XML_Char
*prefix
;
3038 localPart
= tag
->name
.localPart
;
3039 if (parser
->m_ns
&& localPart
) {
3040 /* localPart and prefix may have been overwritten in
3041 tag->name.str, since this points to the binding->uri
3042 buffer which gets re-used; so we have to add them again
3044 uri
= (XML_Char
*)tag
->name
.str
+ tag
->name
.uriLen
;
3045 /* don't need to check for space - already done in storeAtts() */
3047 *uri
++ = *localPart
++;
3048 prefix
= (XML_Char
*)tag
->name
.prefix
;
3049 if (parser
->m_ns_triplets
&& prefix
) {
3050 *uri
++ = parser
->m_namespaceSeparator
;
3056 parser
->m_endElementHandler(parser
->m_handlerArg
, tag
->name
.str
);
3057 } else if (parser
->m_defaultHandler
)
3058 reportDefault(parser
, enc
, s
, next
);
3059 while (tag
->bindings
) {
3060 BINDING
*b
= tag
->bindings
;
3061 if (parser
->m_endNamespaceDeclHandler
)
3062 parser
->m_endNamespaceDeclHandler(parser
->m_handlerArg
,
3064 tag
->bindings
= tag
->bindings
->nextTagBinding
;
3065 b
->nextTagBinding
= parser
->m_freeBindingList
;
3066 parser
->m_freeBindingList
= b
;
3067 b
->prefix
->binding
= b
->prevPrefixBinding
;
3069 if ((parser
->m_tagLevel
== 0)
3070 && (parser
->m_parsingStatus
.parsing
!= XML_FINISHED
)) {
3071 if (parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
)
3072 parser
->m_processor
= epilogProcessor
;
3074 return epilogProcessor(parser
, next
, end
, nextPtr
);
3078 case XML_TOK_CHAR_REF
: {
3079 int n
= XmlCharRefNumber(enc
, s
);
3081 return XML_ERROR_BAD_CHAR_REF
;
3082 if (parser
->m_characterDataHandler
) {
3083 XML_Char buf
[XML_ENCODE_MAX
];
3084 parser
->m_characterDataHandler(parser
->m_handlerArg
, buf
,
3085 XmlEncode(n
, (ICHAR
*)buf
));
3086 } else if (parser
->m_defaultHandler
)
3087 reportDefault(parser
, enc
, s
, next
);
3089 case XML_TOK_XML_DECL
:
3090 return XML_ERROR_MISPLACED_XML_PI
;
3091 case XML_TOK_DATA_NEWLINE
:
3092 if (parser
->m_characterDataHandler
) {
3094 parser
->m_characterDataHandler(parser
->m_handlerArg
, &c
, 1);
3095 } else if (parser
->m_defaultHandler
)
3096 reportDefault(parser
, enc
, s
, next
);
3098 case XML_TOK_CDATA_SECT_OPEN
: {
3099 enum XML_Error result
;
3100 if (parser
->m_startCdataSectionHandler
)
3101 parser
->m_startCdataSectionHandler(parser
->m_handlerArg
);
3102 /* BEGIN disabled code */
3103 /* Suppose you doing a transformation on a document that involves
3104 changing only the character data. You set up a defaultHandler
3105 and a characterDataHandler. The defaultHandler simply copies
3106 characters through. The characterDataHandler does the
3107 transformation and writes the characters out escaping them as
3108 necessary. This case will fail to work if we leave out the
3109 following two lines (because & and < inside CDATA sections will
3110 be incorrectly escaped).
3112 However, now we have a start/endCdataSectionHandler, so it seems
3113 easier to let the user deal with this.
3115 else if (0 && parser
->m_characterDataHandler
)
3116 parser
->m_characterDataHandler(parser
->m_handlerArg
, parser
->m_dataBuf
,
3118 /* END disabled code */
3119 else if (parser
->m_defaultHandler
)
3120 reportDefault(parser
, enc
, s
, next
);
3122 = doCdataSection(parser
, enc
, &next
, end
, nextPtr
, haveMore
, account
);
3123 if (result
!= XML_ERROR_NONE
)
3126 parser
->m_processor
= cdataSectionProcessor
;
3130 case XML_TOK_TRAILING_RSQB
:
3133 return XML_ERROR_NONE
;
3135 if (parser
->m_characterDataHandler
) {
3136 if (MUST_CONVERT(enc
, s
)) {
3137 ICHAR
*dataPtr
= (ICHAR
*)parser
->m_dataBuf
;
3138 XmlConvert(enc
, &s
, end
, &dataPtr
, (ICHAR
*)parser
->m_dataBufEnd
);
3139 parser
->m_characterDataHandler(
3140 parser
->m_handlerArg
, parser
->m_dataBuf
,
3141 (int)(dataPtr
- (ICHAR
*)parser
->m_dataBuf
));
3143 parser
->m_characterDataHandler(
3144 parser
->m_handlerArg
, (XML_Char
*)s
,
3145 (int)((XML_Char
*)end
- (XML_Char
*)s
));
3146 } else if (parser
->m_defaultHandler
)
3147 reportDefault(parser
, enc
, s
, end
);
3148 /* We are at the end of the final buffer, should we check for
3149 XML_SUSPENDED, XML_FINISHED?
3151 if (startTagLevel
== 0) {
3153 return XML_ERROR_NO_ELEMENTS
;
3155 if (parser
->m_tagLevel
!= startTagLevel
) {
3157 return XML_ERROR_ASYNC_ENTITY
;
3160 return XML_ERROR_NONE
;
3161 case XML_TOK_DATA_CHARS
: {
3162 XML_CharacterDataHandler charDataHandler
= parser
->m_characterDataHandler
;
3163 if (charDataHandler
) {
3164 if (MUST_CONVERT(enc
, s
)) {
3166 ICHAR
*dataPtr
= (ICHAR
*)parser
->m_dataBuf
;
3167 const enum XML_Convert_Result convert_res
= XmlConvert(
3168 enc
, &s
, next
, &dataPtr
, (ICHAR
*)parser
->m_dataBufEnd
);
3170 charDataHandler(parser
->m_handlerArg
, parser
->m_dataBuf
,
3171 (int)(dataPtr
- (ICHAR
*)parser
->m_dataBuf
));
3172 if ((convert_res
== XML_CONVERT_COMPLETED
)
3173 || (convert_res
== XML_CONVERT_INPUT_INCOMPLETE
))
3178 charDataHandler(parser
->m_handlerArg
, (XML_Char
*)s
,
3179 (int)((XML_Char
*)next
- (XML_Char
*)s
));
3180 } else if (parser
->m_defaultHandler
)
3181 reportDefault(parser
, enc
, s
, next
);
3184 if (! reportProcessingInstruction(parser
, enc
, s
, next
))
3185 return XML_ERROR_NO_MEMORY
;
3187 case XML_TOK_COMMENT
:
3188 if (! reportComment(parser
, enc
, s
, next
))
3189 return XML_ERROR_NO_MEMORY
;
3192 /* All of the tokens produced by XmlContentTok() have their own
3193 * explicit cases, so this default is not strictly necessary.
3194 * However it is a useful safety net, so we retain the code and
3195 * simply exclude it from the coverage tests.
3199 if (parser
->m_defaultHandler
)
3200 reportDefault(parser
, enc
, s
, next
);
3202 /* LCOV_EXCL_STOP */
3204 *eventPP
= s
= next
;
3205 switch (parser
->m_parsingStatus
.parsing
) {
3208 return XML_ERROR_NONE
;
3210 return XML_ERROR_ABORTED
;
3217 /* This function does not call free() on the allocated memory, merely
3218 * moving it to the parser's m_freeBindingList where it can be freed or
3219 * reused as appropriate.
3222 freeBindings(XML_Parser parser
, BINDING
*bindings
) {
3224 BINDING
*b
= bindings
;
3226 /* m_startNamespaceDeclHandler will have been called for this
3227 * binding in addBindings(), so call the end handler now.
3229 if (parser
->m_endNamespaceDeclHandler
)
3230 parser
->m_endNamespaceDeclHandler(parser
->m_handlerArg
, b
->prefix
->name
);
3232 bindings
= bindings
->nextTagBinding
;
3233 b
->nextTagBinding
= parser
->m_freeBindingList
;
3234 parser
->m_freeBindingList
= b
;
3235 b
->prefix
->binding
= b
->prevPrefixBinding
;
3239 /* Precondition: all arguments must be non-NULL;
3241 - normalize attributes
3242 - check attributes for well-formedness
3243 - generate namespace aware attribute names (URI, prefix)
3244 - build list of attributes for startElementHandler
3245 - default attributes
3246 - process namespace declarations (check and report them)
3247 - generate namespace aware element name (URI, prefix)
3249 static enum XML_Error
3250 storeAtts(XML_Parser parser
, const ENCODING
*enc
, const char *attStr
,
3251 TAG_NAME
*tagNamePtr
, BINDING
**bindingsPtr
,
3252 enum XML_Account account
) {
3253 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
3254 ELEMENT_TYPE
*elementType
;
3256 const XML_Char
**appAtts
; /* the attribute list for the application */
3264 const XML_Char
*localPart
;
3266 /* lookup the element type name */
3268 = (ELEMENT_TYPE
*)lookup(parser
, &dtd
->elementTypes
, tagNamePtr
->str
, 0);
3269 if (! elementType
) {
3270 const XML_Char
*name
= poolCopyString(&dtd
->pool
, tagNamePtr
->str
);
3272 return XML_ERROR_NO_MEMORY
;
3273 elementType
= (ELEMENT_TYPE
*)lookup(parser
, &dtd
->elementTypes
, name
,
3274 sizeof(ELEMENT_TYPE
));
3276 return XML_ERROR_NO_MEMORY
;
3277 if (parser
->m_ns
&& ! setElementTypePrefix(parser
, elementType
))
3278 return XML_ERROR_NO_MEMORY
;
3280 nDefaultAtts
= elementType
->nDefaultAtts
;
3282 /* get the attributes from the tokenizer */
3283 n
= XmlGetAttributes(enc
, attStr
, parser
->m_attsSize
, parser
->m_atts
);
3285 /* Detect and prevent integer overflow */
3286 if (n
> INT_MAX
- nDefaultAtts
) {
3287 return XML_ERROR_NO_MEMORY
;
3290 if (n
+ nDefaultAtts
> parser
->m_attsSize
) {
3291 int oldAttsSize
= parser
->m_attsSize
;
3293 #ifdef XML_ATTR_INFO
3294 XML_AttrInfo
*temp2
;
3297 /* Detect and prevent integer overflow */
3298 if ((nDefaultAtts
> INT_MAX
- INIT_ATTS_SIZE
)
3299 || (n
> INT_MAX
- (nDefaultAtts
+ INIT_ATTS_SIZE
))) {
3300 return XML_ERROR_NO_MEMORY
;
3303 parser
->m_attsSize
= n
+ nDefaultAtts
+ INIT_ATTS_SIZE
;
3305 /* Detect and prevent integer overflow.
3306 * The preprocessor guard addresses the "always false" warning
3307 * from -Wtype-limits on platforms where
3308 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3309 #if UINT_MAX >= SIZE_MAX
3310 if ((unsigned)parser
->m_attsSize
> (size_t)(-1) / sizeof(ATTRIBUTE
)) {
3311 parser
->m_attsSize
= oldAttsSize
;
3312 return XML_ERROR_NO_MEMORY
;
3316 temp
= (ATTRIBUTE
*)REALLOC(parser
, (void *)parser
->m_atts
,
3317 parser
->m_attsSize
* sizeof(ATTRIBUTE
));
3319 parser
->m_attsSize
= oldAttsSize
;
3320 return XML_ERROR_NO_MEMORY
;
3322 parser
->m_atts
= temp
;
3323 #ifdef XML_ATTR_INFO
3324 /* Detect and prevent integer overflow.
3325 * The preprocessor guard addresses the "always false" warning
3326 * from -Wtype-limits on platforms where
3327 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3328 # if UINT_MAX >= SIZE_MAX
3329 if ((unsigned)parser
->m_attsSize
> (size_t)(-1) / sizeof(XML_AttrInfo
)) {
3330 parser
->m_attsSize
= oldAttsSize
;
3331 return XML_ERROR_NO_MEMORY
;
3335 temp2
= (XML_AttrInfo
*)REALLOC(parser
, (void *)parser
->m_attInfo
,
3336 parser
->m_attsSize
* sizeof(XML_AttrInfo
));
3337 if (temp2
== NULL
) {
3338 parser
->m_attsSize
= oldAttsSize
;
3339 return XML_ERROR_NO_MEMORY
;
3341 parser
->m_attInfo
= temp2
;
3343 if (n
> oldAttsSize
)
3344 XmlGetAttributes(enc
, attStr
, n
, parser
->m_atts
);
3347 appAtts
= (const XML_Char
**)parser
->m_atts
;
3348 for (i
= 0; i
< n
; i
++) {
3349 ATTRIBUTE
*currAtt
= &parser
->m_atts
[i
];
3350 #ifdef XML_ATTR_INFO
3351 XML_AttrInfo
*currAttInfo
= &parser
->m_attInfo
[i
];
3353 /* add the name and value to the attribute list */
3355 = getAttributeId(parser
, enc
, currAtt
->name
,
3356 currAtt
->name
+ XmlNameLength(enc
, currAtt
->name
));
3358 return XML_ERROR_NO_MEMORY
;
3359 #ifdef XML_ATTR_INFO
3360 currAttInfo
->nameStart
3361 = parser
->m_parseEndByteIndex
- (parser
->m_parseEndPtr
- currAtt
->name
);
3362 currAttInfo
->nameEnd
3363 = currAttInfo
->nameStart
+ XmlNameLength(enc
, currAtt
->name
);
3364 currAttInfo
->valueStart
= parser
->m_parseEndByteIndex
3365 - (parser
->m_parseEndPtr
- currAtt
->valuePtr
);
3366 currAttInfo
->valueEnd
= parser
->m_parseEndByteIndex
3367 - (parser
->m_parseEndPtr
- currAtt
->valueEnd
);
3369 /* Detect duplicate attributes by their QNames. This does not work when
3370 namespace processing is turned on and different prefixes for the same
3371 namespace are used. For this case we have a check further down.
3373 if ((attId
->name
)[-1]) {
3374 if (enc
== parser
->m_encoding
)
3375 parser
->m_eventPtr
= parser
->m_atts
[i
].name
;
3376 return XML_ERROR_DUPLICATE_ATTRIBUTE
;
3378 (attId
->name
)[-1] = 1;
3379 appAtts
[attIndex
++] = attId
->name
;
3380 if (! parser
->m_atts
[i
].normalized
) {
3381 enum XML_Error result
;
3382 XML_Bool isCdata
= XML_TRUE
;
3384 /* figure out whether declared as other than CDATA */
3385 if (attId
->maybeTokenized
) {
3387 for (j
= 0; j
< nDefaultAtts
; j
++) {
3388 if (attId
== elementType
->defaultAtts
[j
].id
) {
3389 isCdata
= elementType
->defaultAtts
[j
].isCdata
;
3395 /* normalize the attribute value */
3396 result
= storeAttributeValue(
3397 parser
, enc
, isCdata
, parser
->m_atts
[i
].valuePtr
,
3398 parser
->m_atts
[i
].valueEnd
, &parser
->m_tempPool
, account
);
3401 appAtts
[attIndex
] = poolStart(&parser
->m_tempPool
);
3402 poolFinish(&parser
->m_tempPool
);
3404 /* the value did not need normalizing */
3405 appAtts
[attIndex
] = poolStoreString(&parser
->m_tempPool
, enc
,
3406 parser
->m_atts
[i
].valuePtr
,
3407 parser
->m_atts
[i
].valueEnd
);
3408 if (appAtts
[attIndex
] == 0)
3409 return XML_ERROR_NO_MEMORY
;
3410 poolFinish(&parser
->m_tempPool
);
3412 /* handle prefixed attribute names */
3413 if (attId
->prefix
) {
3415 /* deal with namespace declarations here */
3416 enum XML_Error result
= addBinding(parser
, attId
->prefix
, attId
,
3417 appAtts
[attIndex
], bindingsPtr
);
3422 /* deal with other prefixed names later */
3425 (attId
->name
)[-1] = 2;
3431 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3432 parser
->m_nSpecifiedAtts
= attIndex
;
3433 if (elementType
->idAtt
&& (elementType
->idAtt
->name
)[-1]) {
3434 for (i
= 0; i
< attIndex
; i
+= 2)
3435 if (appAtts
[i
] == elementType
->idAtt
->name
) {
3436 parser
->m_idAttIndex
= i
;
3440 parser
->m_idAttIndex
= -1;
3442 /* do attribute defaulting */
3443 for (i
= 0; i
< nDefaultAtts
; i
++) {
3444 const DEFAULT_ATTRIBUTE
*da
= elementType
->defaultAtts
+ i
;
3445 if (! (da
->id
->name
)[-1] && da
->value
) {
3446 if (da
->id
->prefix
) {
3447 if (da
->id
->xmlns
) {
3448 enum XML_Error result
= addBinding(parser
, da
->id
->prefix
, da
->id
,
3449 da
->value
, bindingsPtr
);
3453 (da
->id
->name
)[-1] = 2;
3455 appAtts
[attIndex
++] = da
->id
->name
;
3456 appAtts
[attIndex
++] = da
->value
;
3459 (da
->id
->name
)[-1] = 1;
3460 appAtts
[attIndex
++] = da
->id
->name
;
3461 appAtts
[attIndex
++] = da
->value
;
3465 appAtts
[attIndex
] = 0;
3467 /* expand prefixed attribute names, check for duplicates,
3468 and clear flags that say whether attributes were specified */
3471 int j
; /* hash table index */
3472 unsigned long version
= parser
->m_nsAttsVersion
;
3474 /* Detect and prevent invalid shift */
3475 if (parser
->m_nsAttsPower
>= sizeof(unsigned int) * 8 /* bits per byte */) {
3476 return XML_ERROR_NO_MEMORY
;
3479 unsigned int nsAttsSize
= 1u << parser
->m_nsAttsPower
;
3480 unsigned char oldNsAttsPower
= parser
->m_nsAttsPower
;
3481 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3482 if ((nPrefixes
<< 1)
3483 >> parser
->m_nsAttsPower
) { /* true for m_nsAttsPower = 0 */
3485 /* hash table size must also be a power of 2 and >= 8 */
3486 while (nPrefixes
>> parser
->m_nsAttsPower
++)
3488 if (parser
->m_nsAttsPower
< 3)
3489 parser
->m_nsAttsPower
= 3;
3491 /* Detect and prevent invalid shift */
3492 if (parser
->m_nsAttsPower
>= sizeof(nsAttsSize
) * 8 /* bits per byte */) {
3493 /* Restore actual size of memory in m_nsAtts */
3494 parser
->m_nsAttsPower
= oldNsAttsPower
;
3495 return XML_ERROR_NO_MEMORY
;
3498 nsAttsSize
= 1u << parser
->m_nsAttsPower
;
3500 /* Detect and prevent integer overflow.
3501 * The preprocessor guard addresses the "always false" warning
3502 * from -Wtype-limits on platforms where
3503 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3504 #if UINT_MAX >= SIZE_MAX
3505 if (nsAttsSize
> (size_t)(-1) / sizeof(NS_ATT
)) {
3506 /* Restore actual size of memory in m_nsAtts */
3507 parser
->m_nsAttsPower
= oldNsAttsPower
;
3508 return XML_ERROR_NO_MEMORY
;
3512 temp
= (NS_ATT
*)REALLOC(parser
, parser
->m_nsAtts
,
3513 nsAttsSize
* sizeof(NS_ATT
));
3515 /* Restore actual size of memory in m_nsAtts */
3516 parser
->m_nsAttsPower
= oldNsAttsPower
;
3517 return XML_ERROR_NO_MEMORY
;
3519 parser
->m_nsAtts
= temp
;
3520 version
= 0; /* force re-initialization of m_nsAtts hash table */
3522 /* using a version flag saves us from initializing m_nsAtts every time */
3523 if (! version
) { /* initialize version flags when version wraps around */
3524 version
= INIT_ATTS_VERSION
;
3525 for (j
= nsAttsSize
; j
!= 0;)
3526 parser
->m_nsAtts
[--j
].version
= version
;
3528 parser
->m_nsAttsVersion
= --version
;
3530 /* expand prefixed names and check for duplicates */
3531 for (; i
< attIndex
; i
+= 2) {
3532 const XML_Char
*s
= appAtts
[i
];
3533 if (s
[-1] == 2) { /* prefixed */
3536 unsigned long uriHash
;
3537 struct siphash sip_state
;
3538 struct sipkey sip_key
;
3540 copy_salt_to_sipkey(parser
, &sip_key
);
3541 sip24_init(&sip_state
, &sip_key
);
3543 ((XML_Char
*)s
)[-1] = 0; /* clear flag */
3544 id
= (ATTRIBUTE_ID
*)lookup(parser
, &dtd
->attributeIds
, s
, 0);
3545 if (! id
|| ! id
->prefix
) {
3546 /* This code is walking through the appAtts array, dealing
3547 * with (in this case) a prefixed attribute name. To be in
3548 * the array, the attribute must have already been bound, so
3549 * has to have passed through the hash table lookup once
3550 * already. That implies that an entry for it already
3551 * exists, so the lookup above will return a pointer to
3552 * already allocated memory. There is no opportunaity for
3553 * the allocator to fail, so the condition above cannot be
3556 * Since it is difficult to be certain that the above
3557 * analysis is complete, we retain the test and merely
3558 * remove the code from coverage tests.
3560 return XML_ERROR_NO_MEMORY
; /* LCOV_EXCL_LINE */
3562 b
= id
->prefix
->binding
;
3564 return XML_ERROR_UNBOUND_PREFIX
;
3566 for (j
= 0; j
< b
->uriLen
; j
++) {
3567 const XML_Char c
= b
->uri
[j
];
3568 if (! poolAppendChar(&parser
->m_tempPool
, c
))
3569 return XML_ERROR_NO_MEMORY
;
3572 sip24_update(&sip_state
, b
->uri
, b
->uriLen
* sizeof(XML_Char
));
3574 while (*s
++ != XML_T(ASCII_COLON
))
3577 sip24_update(&sip_state
, s
, keylen(s
) * sizeof(XML_Char
));
3579 do { /* copies null terminator */
3580 if (! poolAppendChar(&parser
->m_tempPool
, *s
))
3581 return XML_ERROR_NO_MEMORY
;
3584 uriHash
= (unsigned long)sip24_final(&sip_state
);
3586 { /* Check hash table for duplicate of expanded name (uriName).
3587 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3589 unsigned char step
= 0;
3590 unsigned long mask
= nsAttsSize
- 1;
3591 j
= uriHash
& mask
; /* index into hash table */
3592 while (parser
->m_nsAtts
[j
].version
== version
) {
3593 /* for speed we compare stored hash values first */
3594 if (uriHash
== parser
->m_nsAtts
[j
].hash
) {
3595 const XML_Char
*s1
= poolStart(&parser
->m_tempPool
);
3596 const XML_Char
*s2
= parser
->m_nsAtts
[j
].uriName
;
3597 /* s1 is null terminated, but not s2 */
3598 for (; *s1
== *s2
&& *s1
!= 0; s1
++, s2
++)
3601 return XML_ERROR_DUPLICATE_ATTRIBUTE
;
3604 step
= PROBE_STEP(uriHash
, mask
, parser
->m_nsAttsPower
);
3605 j
< step
? (j
+= nsAttsSize
- step
) : (j
-= step
);
3609 if (parser
->m_ns_triplets
) { /* append namespace separator and prefix */
3610 parser
->m_tempPool
.ptr
[-1] = parser
->m_namespaceSeparator
;
3611 s
= b
->prefix
->name
;
3613 if (! poolAppendChar(&parser
->m_tempPool
, *s
))
3614 return XML_ERROR_NO_MEMORY
;
3618 /* store expanded name in attribute list */
3619 s
= poolStart(&parser
->m_tempPool
);
3620 poolFinish(&parser
->m_tempPool
);
3623 /* fill empty slot with new version, uriName and hash value */
3624 parser
->m_nsAtts
[j
].version
= version
;
3625 parser
->m_nsAtts
[j
].hash
= uriHash
;
3626 parser
->m_nsAtts
[j
].uriName
= s
;
3628 if (! --nPrefixes
) {
3632 } else /* not prefixed */
3633 ((XML_Char
*)s
)[-1] = 0; /* clear flag */
3636 /* clear flags for the remaining attributes */
3637 for (; i
< attIndex
; i
+= 2)
3638 ((XML_Char
*)(appAtts
[i
]))[-1] = 0;
3639 for (binding
= *bindingsPtr
; binding
; binding
= binding
->nextTagBinding
)
3640 binding
->attId
->name
[-1] = 0;
3643 return XML_ERROR_NONE
;
3645 /* expand the element type name */
3646 if (elementType
->prefix
) {
3647 binding
= elementType
->prefix
->binding
;
3649 return XML_ERROR_UNBOUND_PREFIX
;
3650 localPart
= tagNamePtr
->str
;
3651 while (*localPart
++ != XML_T(ASCII_COLON
))
3653 } else if (dtd
->defaultPrefix
.binding
) {
3654 binding
= dtd
->defaultPrefix
.binding
;
3655 localPart
= tagNamePtr
->str
;
3657 return XML_ERROR_NONE
;
3659 if (parser
->m_ns_triplets
&& binding
->prefix
->name
) {
3660 for (; binding
->prefix
->name
[prefixLen
++];)
3661 ; /* prefixLen includes null terminator */
3663 tagNamePtr
->localPart
= localPart
;
3664 tagNamePtr
->uriLen
= binding
->uriLen
;
3665 tagNamePtr
->prefix
= binding
->prefix
->name
;
3666 tagNamePtr
->prefixLen
= prefixLen
;
3667 for (i
= 0; localPart
[i
++];)
3668 ; /* i includes null terminator */
3670 /* Detect and prevent integer overflow */
3671 if (binding
->uriLen
> INT_MAX
- prefixLen
3672 || i
> INT_MAX
- (binding
->uriLen
+ prefixLen
)) {
3673 return XML_ERROR_NO_MEMORY
;
3676 n
= i
+ binding
->uriLen
+ prefixLen
;
3677 if (n
> binding
->uriAlloc
) {
3680 /* Detect and prevent integer overflow */
3681 if (n
> INT_MAX
- EXPAND_SPARE
) {
3682 return XML_ERROR_NO_MEMORY
;
3684 /* Detect and prevent integer overflow.
3685 * The preprocessor guard addresses the "always false" warning
3686 * from -Wtype-limits on platforms where
3687 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3688 #if UINT_MAX >= SIZE_MAX
3689 if ((unsigned)(n
+ EXPAND_SPARE
) > (size_t)(-1) / sizeof(XML_Char
)) {
3690 return XML_ERROR_NO_MEMORY
;
3694 uri
= (XML_Char
*)MALLOC(parser
, (n
+ EXPAND_SPARE
) * sizeof(XML_Char
));
3696 return XML_ERROR_NO_MEMORY
;
3697 binding
->uriAlloc
= n
+ EXPAND_SPARE
;
3698 memcpy(uri
, binding
->uri
, binding
->uriLen
* sizeof(XML_Char
));
3699 for (p
= parser
->m_tagStack
; p
; p
= p
->parent
)
3700 if (p
->name
.str
== binding
->uri
)
3702 FREE(parser
, binding
->uri
);
3705 /* if m_namespaceSeparator != '\0' then uri includes it already */
3706 uri
= binding
->uri
+ binding
->uriLen
;
3707 memcpy(uri
, localPart
, i
* sizeof(XML_Char
));
3708 /* we always have a namespace separator between localPart and prefix */
3711 *uri
= parser
->m_namespaceSeparator
; /* replace null terminator */
3712 memcpy(uri
+ 1, binding
->prefix
->name
, prefixLen
* sizeof(XML_Char
));
3714 tagNamePtr
->str
= binding
->uri
;
3715 return XML_ERROR_NONE
;
3719 is_rfc3986_uri_char(XML_Char candidate
) {
3720 // For the RFC 3986 ANBF grammar see
3721 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3723 switch (candidate
) {
3724 // From rule "ALPHA" (uppercase half)
3752 // From rule "ALPHA" (lowercase half)
3780 // From rule "DIGIT"
3792 // From rule "pct-encoded"
3795 // From rule "unreserved"
3801 // From rule "gen-delims"
3810 // From rule "sub-delims"
3829 /* addBinding() overwrites the value of prefix->binding without checking.
3830 Therefore one must keep track of the old value outside of addBinding().
3832 static enum XML_Error
3833 addBinding(XML_Parser parser
, PREFIX
*prefix
, const ATTRIBUTE_ID
*attId
,
3834 const XML_Char
*uri
, BINDING
**bindingsPtr
) {
3835 // "http://www.w3.org/XML/1998/namespace"
3836 static const XML_Char xmlNamespace
[]
3837 = {ASCII_h
, ASCII_t
, ASCII_t
, ASCII_p
, ASCII_COLON
,
3838 ASCII_SLASH
, ASCII_SLASH
, ASCII_w
, ASCII_w
, ASCII_w
,
3839 ASCII_PERIOD
, ASCII_w
, ASCII_3
, ASCII_PERIOD
, ASCII_o
,
3840 ASCII_r
, ASCII_g
, ASCII_SLASH
, ASCII_X
, ASCII_M
,
3841 ASCII_L
, ASCII_SLASH
, ASCII_1
, ASCII_9
, ASCII_9
,
3842 ASCII_8
, ASCII_SLASH
, ASCII_n
, ASCII_a
, ASCII_m
,
3843 ASCII_e
, ASCII_s
, ASCII_p
, ASCII_a
, ASCII_c
,
3845 static const int xmlLen
= (int)sizeof(xmlNamespace
) / sizeof(XML_Char
) - 1;
3846 // "http://www.w3.org/2000/xmlns/"
3847 static const XML_Char xmlnsNamespace
[]
3848 = {ASCII_h
, ASCII_t
, ASCII_t
, ASCII_p
, ASCII_COLON
, ASCII_SLASH
,
3849 ASCII_SLASH
, ASCII_w
, ASCII_w
, ASCII_w
, ASCII_PERIOD
, ASCII_w
,
3850 ASCII_3
, ASCII_PERIOD
, ASCII_o
, ASCII_r
, ASCII_g
, ASCII_SLASH
,
3851 ASCII_2
, ASCII_0
, ASCII_0
, ASCII_0
, ASCII_SLASH
, ASCII_x
,
3852 ASCII_m
, ASCII_l
, ASCII_n
, ASCII_s
, ASCII_SLASH
, '\0'};
3853 static const int xmlnsLen
3854 = (int)sizeof(xmlnsNamespace
) / sizeof(XML_Char
) - 1;
3856 XML_Bool mustBeXML
= XML_FALSE
;
3857 XML_Bool isXML
= XML_TRUE
;
3858 XML_Bool isXMLNS
= XML_TRUE
;
3863 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3864 if (*uri
== XML_T('\0') && prefix
->name
)
3865 return XML_ERROR_UNDECLARING_PREFIX
;
3867 if (prefix
->name
&& prefix
->name
[0] == XML_T(ASCII_x
)
3868 && prefix
->name
[1] == XML_T(ASCII_m
)
3869 && prefix
->name
[2] == XML_T(ASCII_l
)) {
3870 /* Not allowed to bind xmlns */
3871 if (prefix
->name
[3] == XML_T(ASCII_n
) && prefix
->name
[4] == XML_T(ASCII_s
)
3872 && prefix
->name
[5] == XML_T('\0'))
3873 return XML_ERROR_RESERVED_PREFIX_XMLNS
;
3875 if (prefix
->name
[3] == XML_T('\0'))
3876 mustBeXML
= XML_TRUE
;
3879 for (len
= 0; uri
[len
]; len
++) {
3880 if (isXML
&& (len
> xmlLen
|| uri
[len
] != xmlNamespace
[len
]))
3883 if (! mustBeXML
&& isXMLNS
3884 && (len
> xmlnsLen
|| uri
[len
] != xmlnsNamespace
[len
]))
3885 isXMLNS
= XML_FALSE
;
3887 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3888 // today (and is not REQUIRED to do so with regard to the XML 1.0
3889 // namespaces specification) we have to at least make sure, that
3890 // the application on top of Expat (that is likely splitting expanded
3891 // element names ("qualified names") of form
3892 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3893 // in its element handler code) cannot be confused by an attacker
3894 // putting additional namespace separator characters into namespace
3895 // declarations. That would be ambiguous and not to be expected.
3897 // While the HTML API docs of function XML_ParserCreateNS have been
3898 // advising against use of a namespace separator character that can
3899 // appear in a URI for >20 years now, some widespread applications
3900 // are using URI characters (':' (colon) in particular) for a
3901 // namespace separator, in practice. To keep these applications
3902 // functional, we only reject namespaces URIs containing the
3903 // application-chosen namespace separator if the chosen separator
3904 // is a non-URI character with regard to RFC 3986.
3905 if (parser
->m_ns
&& (uri
[len
] == parser
->m_namespaceSeparator
)
3906 && ! is_rfc3986_uri_char(uri
[len
])) {
3907 return XML_ERROR_SYNTAX
;
3910 isXML
= isXML
&& len
== xmlLen
;
3911 isXMLNS
= isXMLNS
&& len
== xmlnsLen
;
3913 if (mustBeXML
!= isXML
)
3914 return mustBeXML
? XML_ERROR_RESERVED_PREFIX_XML
3915 : XML_ERROR_RESERVED_NAMESPACE_URI
;
3918 return XML_ERROR_RESERVED_NAMESPACE_URI
;
3920 if (parser
->m_namespaceSeparator
)
3922 if (parser
->m_freeBindingList
) {
3923 b
= parser
->m_freeBindingList
;
3924 if (len
> b
->uriAlloc
) {
3925 /* Detect and prevent integer overflow */
3926 if (len
> INT_MAX
- EXPAND_SPARE
) {
3927 return XML_ERROR_NO_MEMORY
;
3930 /* Detect and prevent integer overflow.
3931 * The preprocessor guard addresses the "always false" warning
3932 * from -Wtype-limits on platforms where
3933 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3934 #if UINT_MAX >= SIZE_MAX
3935 if ((unsigned)(len
+ EXPAND_SPARE
) > (size_t)(-1) / sizeof(XML_Char
)) {
3936 return XML_ERROR_NO_MEMORY
;
3940 XML_Char
*temp
= (XML_Char
*)REALLOC(
3941 parser
, b
->uri
, sizeof(XML_Char
) * (len
+ EXPAND_SPARE
));
3943 return XML_ERROR_NO_MEMORY
;
3945 b
->uriAlloc
= len
+ EXPAND_SPARE
;
3947 parser
->m_freeBindingList
= b
->nextTagBinding
;
3949 b
= (BINDING
*)MALLOC(parser
, sizeof(BINDING
));
3951 return XML_ERROR_NO_MEMORY
;
3953 /* Detect and prevent integer overflow */
3954 if (len
> INT_MAX
- EXPAND_SPARE
) {
3955 return XML_ERROR_NO_MEMORY
;
3957 /* Detect and prevent integer overflow.
3958 * The preprocessor guard addresses the "always false" warning
3959 * from -Wtype-limits on platforms where
3960 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3961 #if UINT_MAX >= SIZE_MAX
3962 if ((unsigned)(len
+ EXPAND_SPARE
) > (size_t)(-1) / sizeof(XML_Char
)) {
3963 return XML_ERROR_NO_MEMORY
;
3968 = (XML_Char
*)MALLOC(parser
, sizeof(XML_Char
) * (len
+ EXPAND_SPARE
));
3971 return XML_ERROR_NO_MEMORY
;
3973 b
->uriAlloc
= len
+ EXPAND_SPARE
;
3976 memcpy(b
->uri
, uri
, len
* sizeof(XML_Char
));
3977 if (parser
->m_namespaceSeparator
)
3978 b
->uri
[len
- 1] = parser
->m_namespaceSeparator
;
3981 b
->prevPrefixBinding
= prefix
->binding
;
3982 /* NULL binding when default namespace undeclared */
3983 if (*uri
== XML_T('\0') && prefix
== &parser
->m_dtd
->defaultPrefix
)
3984 prefix
->binding
= NULL
;
3986 prefix
->binding
= b
;
3987 b
->nextTagBinding
= *bindingsPtr
;
3989 /* if attId == NULL then we are not starting a namespace scope */
3990 if (attId
&& parser
->m_startNamespaceDeclHandler
)
3991 parser
->m_startNamespaceDeclHandler(parser
->m_handlerArg
, prefix
->name
,
3992 prefix
->binding
? uri
: 0);
3993 return XML_ERROR_NONE
;
3996 /* The idea here is to avoid using stack for each CDATA section when
3997 the whole file is parsed with one call.
3999 static enum XML_Error PTRCALL
4000 cdataSectionProcessor(XML_Parser parser
, const char *start
, const char *end
,
4001 const char **endPtr
) {
4002 enum XML_Error result
= doCdataSection(
4003 parser
, parser
->m_encoding
, &start
, end
, endPtr
,
4004 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
, XML_ACCOUNT_DIRECT
);
4005 if (result
!= XML_ERROR_NONE
)
4008 if (parser
->m_parentParser
) { /* we are parsing an external entity */
4009 parser
->m_processor
= externalEntityContentProcessor
;
4010 return externalEntityContentProcessor(parser
, start
, end
, endPtr
);
4012 parser
->m_processor
= contentProcessor
;
4013 return contentProcessor(parser
, start
, end
, endPtr
);
4019 /* startPtr gets set to non-null if the section is closed, and to null if
4020 the section is not yet closed.
4022 static enum XML_Error
4023 doCdataSection(XML_Parser parser
, const ENCODING
*enc
, const char **startPtr
,
4024 const char *end
, const char **nextPtr
, XML_Bool haveMore
,
4025 enum XML_Account account
) {
4026 const char *s
= *startPtr
;
4027 const char **eventPP
;
4028 const char **eventEndPP
;
4029 if (enc
== parser
->m_encoding
) {
4030 eventPP
= &parser
->m_eventPtr
;
4032 eventEndPP
= &parser
->m_eventEndPtr
;
4034 eventPP
= &(parser
->m_openInternalEntities
->internalEventPtr
);
4035 eventEndPP
= &(parser
->m_openInternalEntities
->internalEventEndPtr
);
4041 const char *next
= s
; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4042 int tok
= XmlCdataSectionTok(enc
, s
, end
, &next
);
4044 if (! accountingDiffTolerated(parser
, tok
, s
, next
, __LINE__
, account
)) {
4045 accountingOnAbort(parser
);
4046 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
4053 case XML_TOK_CDATA_SECT_CLOSE
:
4054 if (parser
->m_endCdataSectionHandler
)
4055 parser
->m_endCdataSectionHandler(parser
->m_handlerArg
);
4056 /* BEGIN disabled code */
4057 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4058 else if (0 && parser
->m_characterDataHandler
)
4059 parser
->m_characterDataHandler(parser
->m_handlerArg
, parser
->m_dataBuf
,
4061 /* END disabled code */
4062 else if (parser
->m_defaultHandler
)
4063 reportDefault(parser
, enc
, s
, next
);
4066 if (parser
->m_parsingStatus
.parsing
== XML_FINISHED
)
4067 return XML_ERROR_ABORTED
;
4069 return XML_ERROR_NONE
;
4070 case XML_TOK_DATA_NEWLINE
:
4071 if (parser
->m_characterDataHandler
) {
4073 parser
->m_characterDataHandler(parser
->m_handlerArg
, &c
, 1);
4074 } else if (parser
->m_defaultHandler
)
4075 reportDefault(parser
, enc
, s
, next
);
4077 case XML_TOK_DATA_CHARS
: {
4078 XML_CharacterDataHandler charDataHandler
= parser
->m_characterDataHandler
;
4079 if (charDataHandler
) {
4080 if (MUST_CONVERT(enc
, s
)) {
4082 ICHAR
*dataPtr
= (ICHAR
*)parser
->m_dataBuf
;
4083 const enum XML_Convert_Result convert_res
= XmlConvert(
4084 enc
, &s
, next
, &dataPtr
, (ICHAR
*)parser
->m_dataBufEnd
);
4086 charDataHandler(parser
->m_handlerArg
, parser
->m_dataBuf
,
4087 (int)(dataPtr
- (ICHAR
*)parser
->m_dataBuf
));
4088 if ((convert_res
== XML_CONVERT_COMPLETED
)
4089 || (convert_res
== XML_CONVERT_INPUT_INCOMPLETE
))
4094 charDataHandler(parser
->m_handlerArg
, (XML_Char
*)s
,
4095 (int)((XML_Char
*)next
- (XML_Char
*)s
));
4096 } else if (parser
->m_defaultHandler
)
4097 reportDefault(parser
, enc
, s
, next
);
4099 case XML_TOK_INVALID
:
4101 return XML_ERROR_INVALID_TOKEN
;
4102 case XML_TOK_PARTIAL_CHAR
:
4105 return XML_ERROR_NONE
;
4107 return XML_ERROR_PARTIAL_CHAR
;
4108 case XML_TOK_PARTIAL
:
4112 return XML_ERROR_NONE
;
4114 return XML_ERROR_UNCLOSED_CDATA_SECTION
;
4116 /* Every token returned by XmlCdataSectionTok() has its own
4117 * explicit case, so this default case will never be executed.
4118 * We retain it as a safety net and exclude it from the coverage
4124 return XML_ERROR_UNEXPECTED_STATE
;
4125 /* LCOV_EXCL_STOP */
4128 *eventPP
= s
= next
;
4129 switch (parser
->m_parsingStatus
.parsing
) {
4132 return XML_ERROR_NONE
;
4134 return XML_ERROR_ABORTED
;
4143 /* The idea here is to avoid using stack for each IGNORE section when
4144 the whole file is parsed with one call.
4146 static enum XML_Error PTRCALL
4147 ignoreSectionProcessor(XML_Parser parser
, const char *start
, const char *end
,
4148 const char **endPtr
) {
4149 enum XML_Error result
4150 = doIgnoreSection(parser
, parser
->m_encoding
, &start
, end
, endPtr
,
4151 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
);
4152 if (result
!= XML_ERROR_NONE
)
4155 parser
->m_processor
= prologProcessor
;
4156 return prologProcessor(parser
, start
, end
, endPtr
);
4161 /* startPtr gets set to non-null is the section is closed, and to null
4162 if the section is not yet closed.
4164 static enum XML_Error
4165 doIgnoreSection(XML_Parser parser
, const ENCODING
*enc
, const char **startPtr
,
4166 const char *end
, const char **nextPtr
, XML_Bool haveMore
) {
4167 const char *next
= *startPtr
; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4169 const char *s
= *startPtr
;
4170 const char **eventPP
;
4171 const char **eventEndPP
;
4172 if (enc
== parser
->m_encoding
) {
4173 eventPP
= &parser
->m_eventPtr
;
4175 eventEndPP
= &parser
->m_eventEndPtr
;
4177 /* It's not entirely clear, but it seems the following two lines
4178 * of code cannot be executed. The only occasions on which 'enc'
4179 * is not 'encoding' are when this function is called
4180 * from the internal entity processing, and IGNORE sections are an
4181 * error in internal entities.
4183 * Since it really isn't clear that this is true, we keep the code
4184 * and just remove it from our coverage tests.
4188 eventPP
= &(parser
->m_openInternalEntities
->internalEventPtr
);
4189 eventEndPP
= &(parser
->m_openInternalEntities
->internalEventEndPtr
);
4190 /* LCOV_EXCL_STOP */
4194 tok
= XmlIgnoreSectionTok(enc
, s
, end
, &next
);
4196 if (! accountingDiffTolerated(parser
, tok
, s
, next
, __LINE__
,
4197 XML_ACCOUNT_DIRECT
)) {
4198 accountingOnAbort(parser
);
4199 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
4204 case XML_TOK_IGNORE_SECT
:
4205 if (parser
->m_defaultHandler
)
4206 reportDefault(parser
, enc
, s
, next
);
4209 if (parser
->m_parsingStatus
.parsing
== XML_FINISHED
)
4210 return XML_ERROR_ABORTED
;
4212 return XML_ERROR_NONE
;
4213 case XML_TOK_INVALID
:
4215 return XML_ERROR_INVALID_TOKEN
;
4216 case XML_TOK_PARTIAL_CHAR
:
4219 return XML_ERROR_NONE
;
4221 return XML_ERROR_PARTIAL_CHAR
;
4222 case XML_TOK_PARTIAL
:
4226 return XML_ERROR_NONE
;
4228 return XML_ERROR_SYNTAX
; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4230 /* All of the tokens that XmlIgnoreSectionTok() returns have
4231 * explicit cases to handle them, so this default case is never
4232 * executed. We keep it as a safety net anyway, and remove it
4233 * from our test coverage statistics.
4238 return XML_ERROR_UNEXPECTED_STATE
;
4239 /* LCOV_EXCL_STOP */
4244 #endif /* XML_DTD */
4246 static enum XML_Error
4247 initializeEncoding(XML_Parser parser
) {
4250 char encodingBuf
[128];
4251 /* See comments about `protocolEncodingName` in parserInit() */
4252 if (! parser
->m_protocolEncodingName
)
4256 for (i
= 0; parser
->m_protocolEncodingName
[i
]; i
++) {
4257 if (i
== sizeof(encodingBuf
) - 1
4258 || (parser
->m_protocolEncodingName
[i
] & ~0x7f) != 0) {
4259 encodingBuf
[0] = '\0';
4262 encodingBuf
[i
] = (char)parser
->m_protocolEncodingName
[i
];
4264 encodingBuf
[i
] = '\0';
4268 s
= parser
->m_protocolEncodingName
;
4270 if ((parser
->m_ns
? XmlInitEncodingNS
: XmlInitEncoding
)(
4271 &parser
->m_initEncoding
, &parser
->m_encoding
, s
))
4272 return XML_ERROR_NONE
;
4273 return handleUnknownEncoding(parser
, parser
->m_protocolEncodingName
);
4276 static enum XML_Error
4277 processXmlDecl(XML_Parser parser
, int isGeneralTextEntity
, const char *s
,
4279 const char *encodingName
= NULL
;
4280 const XML_Char
*storedEncName
= NULL
;
4281 const ENCODING
*newEncoding
= NULL
;
4282 const char *version
= NULL
;
4283 const char *versionend
= NULL
;
4284 const XML_Char
*storedversion
= NULL
;
4285 int standalone
= -1;
4288 if (! accountingDiffTolerated(parser
, XML_TOK_XML_DECL
, s
, next
, __LINE__
,
4289 XML_ACCOUNT_DIRECT
)) {
4290 accountingOnAbort(parser
);
4291 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
4295 if (! (parser
->m_ns
? XmlParseXmlDeclNS
: XmlParseXmlDecl
)(
4296 isGeneralTextEntity
, parser
->m_encoding
, s
, next
, &parser
->m_eventPtr
,
4297 &version
, &versionend
, &encodingName
, &newEncoding
, &standalone
)) {
4298 if (isGeneralTextEntity
)
4299 return XML_ERROR_TEXT_DECL
;
4301 return XML_ERROR_XML_DECL
;
4303 if (! isGeneralTextEntity
&& standalone
== 1) {
4304 parser
->m_dtd
->standalone
= XML_TRUE
;
4306 if (parser
->m_paramEntityParsing
4307 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
)
4308 parser
->m_paramEntityParsing
= XML_PARAM_ENTITY_PARSING_NEVER
;
4309 #endif /* XML_DTD */
4311 if (parser
->m_xmlDeclHandler
) {
4312 if (encodingName
!= NULL
) {
4313 storedEncName
= poolStoreString(
4314 &parser
->m_temp2Pool
, parser
->m_encoding
, encodingName
,
4315 encodingName
+ XmlNameLength(parser
->m_encoding
, encodingName
));
4316 if (! storedEncName
)
4317 return XML_ERROR_NO_MEMORY
;
4318 poolFinish(&parser
->m_temp2Pool
);
4322 = poolStoreString(&parser
->m_temp2Pool
, parser
->m_encoding
, version
,
4323 versionend
- parser
->m_encoding
->minBytesPerChar
);
4324 if (! storedversion
)
4325 return XML_ERROR_NO_MEMORY
;
4327 parser
->m_xmlDeclHandler(parser
->m_handlerArg
, storedversion
, storedEncName
,
4329 } else if (parser
->m_defaultHandler
)
4330 reportDefault(parser
, parser
->m_encoding
, s
, next
);
4331 if (parser
->m_protocolEncodingName
== NULL
) {
4333 /* Check that the specified encoding does not conflict with what
4334 * the parser has already deduced. Do we have the same number
4335 * of bytes in the smallest representation of a character? If
4336 * this is UTF-16, is it the same endianness?
4338 if (newEncoding
->minBytesPerChar
!= parser
->m_encoding
->minBytesPerChar
4339 || (newEncoding
->minBytesPerChar
== 2
4340 && newEncoding
!= parser
->m_encoding
)) {
4341 parser
->m_eventPtr
= encodingName
;
4342 return XML_ERROR_INCORRECT_ENCODING
;
4344 parser
->m_encoding
= newEncoding
;
4345 } else if (encodingName
) {
4346 enum XML_Error result
;
4347 if (! storedEncName
) {
4348 storedEncName
= poolStoreString(
4349 &parser
->m_temp2Pool
, parser
->m_encoding
, encodingName
,
4350 encodingName
+ XmlNameLength(parser
->m_encoding
, encodingName
));
4351 if (! storedEncName
)
4352 return XML_ERROR_NO_MEMORY
;
4354 result
= handleUnknownEncoding(parser
, storedEncName
);
4355 poolClear(&parser
->m_temp2Pool
);
4356 if (result
== XML_ERROR_UNKNOWN_ENCODING
)
4357 parser
->m_eventPtr
= encodingName
;
4362 if (storedEncName
|| storedversion
)
4363 poolClear(&parser
->m_temp2Pool
);
4365 return XML_ERROR_NONE
;
4368 static enum XML_Error
4369 handleUnknownEncoding(XML_Parser parser
, const XML_Char
*encodingName
) {
4370 if (parser
->m_unknownEncodingHandler
) {
4373 for (i
= 0; i
< 256; i
++)
4375 info
.convert
= NULL
;
4377 info
.release
= NULL
;
4378 if (parser
->m_unknownEncodingHandler(parser
->m_unknownEncodingHandlerData
,
4379 encodingName
, &info
)) {
4381 parser
->m_unknownEncodingMem
= MALLOC(parser
, XmlSizeOfUnknownEncoding());
4382 if (! parser
->m_unknownEncodingMem
) {
4384 info
.release(info
.data
);
4385 return XML_ERROR_NO_MEMORY
;
4387 enc
= (parser
->m_ns
? XmlInitUnknownEncodingNS
: XmlInitUnknownEncoding
)(
4388 parser
->m_unknownEncodingMem
, info
.map
, info
.convert
, info
.data
);
4390 parser
->m_unknownEncodingData
= info
.data
;
4391 parser
->m_unknownEncodingRelease
= info
.release
;
4392 parser
->m_encoding
= enc
;
4393 return XML_ERROR_NONE
;
4396 if (info
.release
!= NULL
)
4397 info
.release(info
.data
);
4399 return XML_ERROR_UNKNOWN_ENCODING
;
4402 static enum XML_Error PTRCALL
4403 prologInitProcessor(XML_Parser parser
, const char *s
, const char *end
,
4404 const char **nextPtr
) {
4405 enum XML_Error result
= initializeEncoding(parser
);
4406 if (result
!= XML_ERROR_NONE
)
4408 parser
->m_processor
= prologProcessor
;
4409 return prologProcessor(parser
, s
, end
, nextPtr
);
4414 static enum XML_Error PTRCALL
4415 externalParEntInitProcessor(XML_Parser parser
, const char *s
, const char *end
,
4416 const char **nextPtr
) {
4417 enum XML_Error result
= initializeEncoding(parser
);
4418 if (result
!= XML_ERROR_NONE
)
4421 /* we know now that XML_Parse(Buffer) has been called,
4422 so we consider the external parameter entity read */
4423 parser
->m_dtd
->paramEntityRead
= XML_TRUE
;
4425 if (parser
->m_prologState
.inEntityValue
) {
4426 parser
->m_processor
= entityValueInitProcessor
;
4427 return entityValueInitProcessor(parser
, s
, end
, nextPtr
);
4429 parser
->m_processor
= externalParEntProcessor
;
4430 return externalParEntProcessor(parser
, s
, end
, nextPtr
);
4434 static enum XML_Error PTRCALL
4435 entityValueInitProcessor(XML_Parser parser
, const char *s
, const char *end
,
4436 const char **nextPtr
) {
4438 const char *start
= s
;
4439 const char *next
= start
;
4440 parser
->m_eventPtr
= start
;
4443 tok
= XmlPrologTok(parser
->m_encoding
, start
, end
, &next
);
4444 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4448 parser
->m_eventEndPtr
= next
;
4450 if (! parser
->m_parsingStatus
.finalBuffer
&& tok
!= XML_TOK_INVALID
) {
4452 return XML_ERROR_NONE
;
4455 case XML_TOK_INVALID
:
4456 return XML_ERROR_INVALID_TOKEN
;
4457 case XML_TOK_PARTIAL
:
4458 return XML_ERROR_UNCLOSED_TOKEN
;
4459 case XML_TOK_PARTIAL_CHAR
:
4460 return XML_ERROR_PARTIAL_CHAR
;
4461 case XML_TOK_NONE
: /* start == end */
4465 /* found end of entity value - can store it now */
4466 return storeEntityValue(parser
, parser
->m_encoding
, s
, end
,
4467 XML_ACCOUNT_DIRECT
);
4468 } else if (tok
== XML_TOK_XML_DECL
) {
4469 enum XML_Error result
;
4470 result
= processXmlDecl(parser
, 0, start
, next
);
4471 if (result
!= XML_ERROR_NONE
)
4473 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4474 * that to happen, a parameter entity parsing handler must have attempted
4475 * to suspend the parser, which fails and raises an error. The parser can
4476 * be aborted, but can't be suspended.
4478 if (parser
->m_parsingStatus
.parsing
== XML_FINISHED
)
4479 return XML_ERROR_ABORTED
;
4481 /* stop scanning for text declaration - we found one */
4482 parser
->m_processor
= entityValueProcessor
;
4483 return entityValueProcessor(parser
, next
, end
, nextPtr
);
4485 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4486 return XML_TOK_NONE on the next call, which would then cause the
4487 function to exit with *nextPtr set to s - that is what we want for other
4488 tokens, but not for the BOM - we would rather like to skip it;
4489 then, when this routine is entered the next time, XmlPrologTok will
4490 return XML_TOK_INVALID, since the BOM is still in the buffer
4492 else if (tok
== XML_TOK_BOM
&& next
== end
4493 && ! parser
->m_parsingStatus
.finalBuffer
) {
4495 if (! accountingDiffTolerated(parser
, tok
, s
, next
, __LINE__
,
4496 XML_ACCOUNT_DIRECT
)) {
4497 accountingOnAbort(parser
);
4498 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
4503 return XML_ERROR_NONE
;
4505 /* If we get this token, we have the start of what might be a
4506 normal tag, but not a declaration (i.e. it doesn't begin with
4507 "<!"). In a DTD context, that isn't legal.
4509 else if (tok
== XML_TOK_INSTANCE_START
) {
4511 return XML_ERROR_SYNTAX
;
4514 parser
->m_eventPtr
= start
;
4518 static enum XML_Error PTRCALL
4519 externalParEntProcessor(XML_Parser parser
, const char *s
, const char *end
,
4520 const char **nextPtr
) {
4521 const char *next
= s
;
4524 tok
= XmlPrologTok(parser
->m_encoding
, s
, end
, &next
);
4526 if (! parser
->m_parsingStatus
.finalBuffer
&& tok
!= XML_TOK_INVALID
) {
4528 return XML_ERROR_NONE
;
4531 case XML_TOK_INVALID
:
4532 return XML_ERROR_INVALID_TOKEN
;
4533 case XML_TOK_PARTIAL
:
4534 return XML_ERROR_UNCLOSED_TOKEN
;
4535 case XML_TOK_PARTIAL_CHAR
:
4536 return XML_ERROR_PARTIAL_CHAR
;
4537 case XML_TOK_NONE
: /* start == end */
4542 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4543 However, when parsing an external subset, doProlog will not accept a BOM
4544 as valid, and report a syntax error, so we have to skip the BOM, and
4545 account for the BOM bytes.
4547 else if (tok
== XML_TOK_BOM
) {
4548 if (! accountingDiffTolerated(parser
, tok
, s
, next
, __LINE__
,
4549 XML_ACCOUNT_DIRECT
)) {
4550 accountingOnAbort(parser
);
4551 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
4555 tok
= XmlPrologTok(parser
->m_encoding
, s
, end
, &next
);
4558 parser
->m_processor
= prologProcessor
;
4559 return doProlog(parser
, parser
->m_encoding
, s
, end
, tok
, next
, nextPtr
,
4560 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
, XML_TRUE
,
4561 XML_ACCOUNT_DIRECT
);
4564 static enum XML_Error PTRCALL
4565 entityValueProcessor(XML_Parser parser
, const char *s
, const char *end
,
4566 const char **nextPtr
) {
4567 const char *start
= s
;
4568 const char *next
= s
;
4569 const ENCODING
*enc
= parser
->m_encoding
;
4573 tok
= XmlPrologTok(enc
, start
, end
, &next
);
4574 /* Note: These bytes are accounted later in:
4578 if (! parser
->m_parsingStatus
.finalBuffer
&& tok
!= XML_TOK_INVALID
) {
4580 return XML_ERROR_NONE
;
4583 case XML_TOK_INVALID
:
4584 return XML_ERROR_INVALID_TOKEN
;
4585 case XML_TOK_PARTIAL
:
4586 return XML_ERROR_UNCLOSED_TOKEN
;
4587 case XML_TOK_PARTIAL_CHAR
:
4588 return XML_ERROR_PARTIAL_CHAR
;
4589 case XML_TOK_NONE
: /* start == end */
4593 /* found end of entity value - can store it now */
4594 return storeEntityValue(parser
, enc
, s
, end
, XML_ACCOUNT_DIRECT
);
4600 #endif /* XML_DTD */
4602 static enum XML_Error PTRCALL
4603 prologProcessor(XML_Parser parser
, const char *s
, const char *end
,
4604 const char **nextPtr
) {
4605 const char *next
= s
;
4606 int tok
= XmlPrologTok(parser
->m_encoding
, s
, end
, &next
);
4607 return doProlog(parser
, parser
->m_encoding
, s
, end
, tok
, next
, nextPtr
,
4608 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
, XML_TRUE
,
4609 XML_ACCOUNT_DIRECT
);
4612 static enum XML_Error
4613 doProlog(XML_Parser parser
, const ENCODING
*enc
, const char *s
, const char *end
,
4614 int tok
, const char *next
, const char **nextPtr
, XML_Bool haveMore
,
4615 XML_Bool allowClosingDoctype
, enum XML_Account account
) {
4617 static const XML_Char externalSubsetName
[] = {ASCII_HASH
, '\0'};
4618 #endif /* XML_DTD */
4619 static const XML_Char atypeCDATA
[]
4620 = {ASCII_C
, ASCII_D
, ASCII_A
, ASCII_T
, ASCII_A
, '\0'};
4621 static const XML_Char atypeID
[] = {ASCII_I
, ASCII_D
, '\0'};
4622 static const XML_Char atypeIDREF
[]
4623 = {ASCII_I
, ASCII_D
, ASCII_R
, ASCII_E
, ASCII_F
, '\0'};
4624 static const XML_Char atypeIDREFS
[]
4625 = {ASCII_I
, ASCII_D
, ASCII_R
, ASCII_E
, ASCII_F
, ASCII_S
, '\0'};
4626 static const XML_Char atypeENTITY
[]
4627 = {ASCII_E
, ASCII_N
, ASCII_T
, ASCII_I
, ASCII_T
, ASCII_Y
, '\0'};
4628 static const XML_Char atypeENTITIES
[]
4629 = {ASCII_E
, ASCII_N
, ASCII_T
, ASCII_I
, ASCII_T
,
4630 ASCII_I
, ASCII_E
, ASCII_S
, '\0'};
4631 static const XML_Char atypeNMTOKEN
[]
4632 = {ASCII_N
, ASCII_M
, ASCII_T
, ASCII_O
, ASCII_K
, ASCII_E
, ASCII_N
, '\0'};
4633 static const XML_Char atypeNMTOKENS
[]
4634 = {ASCII_N
, ASCII_M
, ASCII_T
, ASCII_O
, ASCII_K
,
4635 ASCII_E
, ASCII_N
, ASCII_S
, '\0'};
4636 static const XML_Char notationPrefix
[]
4637 = {ASCII_N
, ASCII_O
, ASCII_T
, ASCII_A
, ASCII_T
,
4638 ASCII_I
, ASCII_O
, ASCII_N
, ASCII_LPAREN
, '\0'};
4639 static const XML_Char enumValueSep
[] = {ASCII_PIPE
, '\0'};
4640 static const XML_Char enumValueStart
[] = {ASCII_LPAREN
, '\0'};
4646 /* save one level of indirection */
4647 DTD
*const dtd
= parser
->m_dtd
;
4649 const char **eventPP
;
4650 const char **eventEndPP
;
4651 enum XML_Content_Quant quant
;
4653 if (enc
== parser
->m_encoding
) {
4654 eventPP
= &parser
->m_eventPtr
;
4655 eventEndPP
= &parser
->m_eventEndPtr
;
4657 eventPP
= &(parser
->m_openInternalEntities
->internalEventPtr
);
4658 eventEndPP
= &(parser
->m_openInternalEntities
->internalEventEndPtr
);
4663 XML_Bool handleDefault
= XML_TRUE
;
4667 if (haveMore
&& tok
!= XML_TOK_INVALID
) {
4669 return XML_ERROR_NONE
;
4672 case XML_TOK_INVALID
:
4674 return XML_ERROR_INVALID_TOKEN
;
4675 case XML_TOK_PARTIAL
:
4676 return XML_ERROR_UNCLOSED_TOKEN
;
4677 case XML_TOK_PARTIAL_CHAR
:
4678 return XML_ERROR_PARTIAL_CHAR
;
4679 case -XML_TOK_PROLOG_S
:
4684 /* for internal PE NOT referenced between declarations */
4685 if (enc
!= parser
->m_encoding
4686 && ! parser
->m_openInternalEntities
->betweenDecl
) {
4688 return XML_ERROR_NONE
;
4690 /* WFC: PE Between Declarations - must check that PE contains
4691 complete markup, not only for external PEs, but also for
4692 internal PEs if the reference occurs between declarations.
4694 if (parser
->m_isParamEntity
|| enc
!= parser
->m_encoding
) {
4695 if (XmlTokenRole(&parser
->m_prologState
, XML_TOK_NONE
, end
, end
, enc
)
4697 return XML_ERROR_INCOMPLETE_PE
;
4699 return XML_ERROR_NONE
;
4701 #endif /* XML_DTD */
4702 return XML_ERROR_NO_ELEMENTS
;
4709 role
= XmlTokenRole(&parser
->m_prologState
, tok
, s
, next
, enc
);
4712 case XML_ROLE_INSTANCE_START
: // bytes accounted in contentProcessor
4713 case XML_ROLE_XML_DECL
: // bytes accounted in processXmlDecl
4714 case XML_ROLE_TEXT_DECL
: // bytes accounted in processXmlDecl
4717 if (! accountingDiffTolerated(parser
, tok
, s
, next
, __LINE__
, account
)) {
4718 accountingOnAbort(parser
);
4719 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
4724 case XML_ROLE_XML_DECL
: {
4725 enum XML_Error result
= processXmlDecl(parser
, 0, s
, next
);
4726 if (result
!= XML_ERROR_NONE
)
4728 enc
= parser
->m_encoding
;
4729 handleDefault
= XML_FALSE
;
4731 case XML_ROLE_DOCTYPE_NAME
:
4732 if (parser
->m_startDoctypeDeclHandler
) {
4733 parser
->m_doctypeName
4734 = poolStoreString(&parser
->m_tempPool
, enc
, s
, next
);
4735 if (! parser
->m_doctypeName
)
4736 return XML_ERROR_NO_MEMORY
;
4737 poolFinish(&parser
->m_tempPool
);
4738 parser
->m_doctypePubid
= NULL
;
4739 handleDefault
= XML_FALSE
;
4741 parser
->m_doctypeSysid
= NULL
; /* always initialize to NULL */
4743 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET
:
4744 if (parser
->m_startDoctypeDeclHandler
) {
4745 parser
->m_startDoctypeDeclHandler(
4746 parser
->m_handlerArg
, parser
->m_doctypeName
, parser
->m_doctypeSysid
,
4747 parser
->m_doctypePubid
, 1);
4748 parser
->m_doctypeName
= NULL
;
4749 poolClear(&parser
->m_tempPool
);
4750 handleDefault
= XML_FALSE
;
4754 case XML_ROLE_TEXT_DECL
: {
4755 enum XML_Error result
= processXmlDecl(parser
, 1, s
, next
);
4756 if (result
!= XML_ERROR_NONE
)
4758 enc
= parser
->m_encoding
;
4759 handleDefault
= XML_FALSE
;
4761 #endif /* XML_DTD */
4762 case XML_ROLE_DOCTYPE_PUBLIC_ID
:
4764 parser
->m_useForeignDTD
= XML_FALSE
;
4765 parser
->m_declEntity
= (ENTITY
*)lookup(
4766 parser
, &dtd
->paramEntities
, externalSubsetName
, sizeof(ENTITY
));
4767 if (! parser
->m_declEntity
)
4768 return XML_ERROR_NO_MEMORY
;
4769 #endif /* XML_DTD */
4770 dtd
->hasParamEntityRefs
= XML_TRUE
;
4771 if (parser
->m_startDoctypeDeclHandler
) {
4773 if (! XmlIsPublicId(enc
, s
, next
, eventPP
))
4774 return XML_ERROR_PUBLICID
;
4775 pubId
= poolStoreString(&parser
->m_tempPool
, enc
,
4776 s
+ enc
->minBytesPerChar
,
4777 next
- enc
->minBytesPerChar
);
4779 return XML_ERROR_NO_MEMORY
;
4780 normalizePublicId(pubId
);
4781 poolFinish(&parser
->m_tempPool
);
4782 parser
->m_doctypePubid
= pubId
;
4783 handleDefault
= XML_FALSE
;
4784 goto alreadyChecked
;
4787 case XML_ROLE_ENTITY_PUBLIC_ID
:
4788 if (! XmlIsPublicId(enc
, s
, next
, eventPP
))
4789 return XML_ERROR_PUBLICID
;
4791 if (dtd
->keepProcessing
&& parser
->m_declEntity
) {
4793 = poolStoreString(&dtd
->pool
, enc
, s
+ enc
->minBytesPerChar
,
4794 next
- enc
->minBytesPerChar
);
4796 return XML_ERROR_NO_MEMORY
;
4797 normalizePublicId(tem
);
4798 parser
->m_declEntity
->publicId
= tem
;
4799 poolFinish(&dtd
->pool
);
4800 /* Don't suppress the default handler if we fell through from
4801 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4803 if (parser
->m_entityDeclHandler
&& role
== XML_ROLE_ENTITY_PUBLIC_ID
)
4804 handleDefault
= XML_FALSE
;
4807 case XML_ROLE_DOCTYPE_CLOSE
:
4808 if (allowClosingDoctype
!= XML_TRUE
) {
4809 /* Must not close doctype from within expanded parameter entities */
4810 return XML_ERROR_INVALID_TOKEN
;
4813 if (parser
->m_doctypeName
) {
4814 parser
->m_startDoctypeDeclHandler(
4815 parser
->m_handlerArg
, parser
->m_doctypeName
, parser
->m_doctypeSysid
,
4816 parser
->m_doctypePubid
, 0);
4817 poolClear(&parser
->m_tempPool
);
4818 handleDefault
= XML_FALSE
;
4820 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4821 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4822 was not set, indicating an external subset
4825 if (parser
->m_doctypeSysid
|| parser
->m_useForeignDTD
) {
4826 XML_Bool hadParamEntityRefs
= dtd
->hasParamEntityRefs
;
4827 dtd
->hasParamEntityRefs
= XML_TRUE
;
4828 if (parser
->m_paramEntityParsing
4829 && parser
->m_externalEntityRefHandler
) {
4830 ENTITY
*entity
= (ENTITY
*)lookup(parser
, &dtd
->paramEntities
,
4831 externalSubsetName
, sizeof(ENTITY
));
4833 /* The external subset name "#" will have already been
4834 * inserted into the hash table at the start of the
4835 * external entity parsing, so no allocation will happen
4836 * and lookup() cannot fail.
4838 return XML_ERROR_NO_MEMORY
; /* LCOV_EXCL_LINE */
4840 if (parser
->m_useForeignDTD
)
4841 entity
->base
= parser
->m_curBase
;
4842 dtd
->paramEntityRead
= XML_FALSE
;
4843 if (! parser
->m_externalEntityRefHandler(
4844 parser
->m_externalEntityRefHandlerArg
, 0, entity
->base
,
4845 entity
->systemId
, entity
->publicId
))
4846 return XML_ERROR_EXTERNAL_ENTITY_HANDLING
;
4847 if (dtd
->paramEntityRead
) {
4848 if (! dtd
->standalone
&& parser
->m_notStandaloneHandler
4849 && ! parser
->m_notStandaloneHandler(parser
->m_handlerArg
))
4850 return XML_ERROR_NOT_STANDALONE
;
4852 /* if we didn't read the foreign DTD then this means that there
4853 is no external subset and we must reset dtd->hasParamEntityRefs
4855 else if (! parser
->m_doctypeSysid
)
4856 dtd
->hasParamEntityRefs
= hadParamEntityRefs
;
4857 /* end of DTD - no need to update dtd->keepProcessing */
4859 parser
->m_useForeignDTD
= XML_FALSE
;
4861 #endif /* XML_DTD */
4862 if (parser
->m_endDoctypeDeclHandler
) {
4863 parser
->m_endDoctypeDeclHandler(parser
->m_handlerArg
);
4864 handleDefault
= XML_FALSE
;
4867 case XML_ROLE_INSTANCE_START
:
4869 /* if there is no DOCTYPE declaration then now is the
4870 last chance to read the foreign DTD
4872 if (parser
->m_useForeignDTD
) {
4873 XML_Bool hadParamEntityRefs
= dtd
->hasParamEntityRefs
;
4874 dtd
->hasParamEntityRefs
= XML_TRUE
;
4875 if (parser
->m_paramEntityParsing
4876 && parser
->m_externalEntityRefHandler
) {
4877 ENTITY
*entity
= (ENTITY
*)lookup(parser
, &dtd
->paramEntities
,
4878 externalSubsetName
, sizeof(ENTITY
));
4880 return XML_ERROR_NO_MEMORY
;
4881 entity
->base
= parser
->m_curBase
;
4882 dtd
->paramEntityRead
= XML_FALSE
;
4883 if (! parser
->m_externalEntityRefHandler(
4884 parser
->m_externalEntityRefHandlerArg
, 0, entity
->base
,
4885 entity
->systemId
, entity
->publicId
))
4886 return XML_ERROR_EXTERNAL_ENTITY_HANDLING
;
4887 if (dtd
->paramEntityRead
) {
4888 if (! dtd
->standalone
&& parser
->m_notStandaloneHandler
4889 && ! parser
->m_notStandaloneHandler(parser
->m_handlerArg
))
4890 return XML_ERROR_NOT_STANDALONE
;
4892 /* if we didn't read the foreign DTD then this means that there
4893 is no external subset and we must reset dtd->hasParamEntityRefs
4896 dtd
->hasParamEntityRefs
= hadParamEntityRefs
;
4897 /* end of DTD - no need to update dtd->keepProcessing */
4900 #endif /* XML_DTD */
4901 parser
->m_processor
= contentProcessor
;
4902 return contentProcessor(parser
, s
, end
, nextPtr
);
4903 case XML_ROLE_ATTLIST_ELEMENT_NAME
:
4904 parser
->m_declElementType
= getElementType(parser
, enc
, s
, next
);
4905 if (! parser
->m_declElementType
)
4906 return XML_ERROR_NO_MEMORY
;
4907 goto checkAttListDeclHandler
;
4908 case XML_ROLE_ATTRIBUTE_NAME
:
4909 parser
->m_declAttributeId
= getAttributeId(parser
, enc
, s
, next
);
4910 if (! parser
->m_declAttributeId
)
4911 return XML_ERROR_NO_MEMORY
;
4912 parser
->m_declAttributeIsCdata
= XML_FALSE
;
4913 parser
->m_declAttributeType
= NULL
;
4914 parser
->m_declAttributeIsId
= XML_FALSE
;
4915 goto checkAttListDeclHandler
;
4916 case XML_ROLE_ATTRIBUTE_TYPE_CDATA
:
4917 parser
->m_declAttributeIsCdata
= XML_TRUE
;
4918 parser
->m_declAttributeType
= atypeCDATA
;
4919 goto checkAttListDeclHandler
;
4920 case XML_ROLE_ATTRIBUTE_TYPE_ID
:
4921 parser
->m_declAttributeIsId
= XML_TRUE
;
4922 parser
->m_declAttributeType
= atypeID
;
4923 goto checkAttListDeclHandler
;
4924 case XML_ROLE_ATTRIBUTE_TYPE_IDREF
:
4925 parser
->m_declAttributeType
= atypeIDREF
;
4926 goto checkAttListDeclHandler
;
4927 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS
:
4928 parser
->m_declAttributeType
= atypeIDREFS
;
4929 goto checkAttListDeclHandler
;
4930 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY
:
4931 parser
->m_declAttributeType
= atypeENTITY
;
4932 goto checkAttListDeclHandler
;
4933 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES
:
4934 parser
->m_declAttributeType
= atypeENTITIES
;
4935 goto checkAttListDeclHandler
;
4936 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN
:
4937 parser
->m_declAttributeType
= atypeNMTOKEN
;
4938 goto checkAttListDeclHandler
;
4939 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS
:
4940 parser
->m_declAttributeType
= atypeNMTOKENS
;
4941 checkAttListDeclHandler
:
4942 if (dtd
->keepProcessing
&& parser
->m_attlistDeclHandler
)
4943 handleDefault
= XML_FALSE
;
4945 case XML_ROLE_ATTRIBUTE_ENUM_VALUE
:
4946 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE
:
4947 if (dtd
->keepProcessing
&& parser
->m_attlistDeclHandler
) {
4948 const XML_Char
*prefix
;
4949 if (parser
->m_declAttributeType
) {
4950 prefix
= enumValueSep
;
4952 prefix
= (role
== XML_ROLE_ATTRIBUTE_NOTATION_VALUE
? notationPrefix
4955 if (! poolAppendString(&parser
->m_tempPool
, prefix
))
4956 return XML_ERROR_NO_MEMORY
;
4957 if (! poolAppend(&parser
->m_tempPool
, enc
, s
, next
))
4958 return XML_ERROR_NO_MEMORY
;
4959 parser
->m_declAttributeType
= parser
->m_tempPool
.start
;
4960 handleDefault
= XML_FALSE
;
4963 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE
:
4964 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE
:
4965 if (dtd
->keepProcessing
) {
4966 if (! defineAttribute(parser
->m_declElementType
,
4967 parser
->m_declAttributeId
,
4968 parser
->m_declAttributeIsCdata
,
4969 parser
->m_declAttributeIsId
, 0, parser
))
4970 return XML_ERROR_NO_MEMORY
;
4971 if (parser
->m_attlistDeclHandler
&& parser
->m_declAttributeType
) {
4972 if (*parser
->m_declAttributeType
== XML_T(ASCII_LPAREN
)
4973 || (*parser
->m_declAttributeType
== XML_T(ASCII_N
)
4974 && parser
->m_declAttributeType
[1] == XML_T(ASCII_O
))) {
4975 /* Enumerated or Notation type */
4976 if (! poolAppendChar(&parser
->m_tempPool
, XML_T(ASCII_RPAREN
))
4977 || ! poolAppendChar(&parser
->m_tempPool
, XML_T('\0')))
4978 return XML_ERROR_NO_MEMORY
;
4979 parser
->m_declAttributeType
= parser
->m_tempPool
.start
;
4980 poolFinish(&parser
->m_tempPool
);
4983 parser
->m_attlistDeclHandler(
4984 parser
->m_handlerArg
, parser
->m_declElementType
->name
,
4985 parser
->m_declAttributeId
->name
, parser
->m_declAttributeType
, 0,
4986 role
== XML_ROLE_REQUIRED_ATTRIBUTE_VALUE
);
4987 handleDefault
= XML_FALSE
;
4990 poolClear(&parser
->m_tempPool
);
4992 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE
:
4993 case XML_ROLE_FIXED_ATTRIBUTE_VALUE
:
4994 if (dtd
->keepProcessing
) {
4995 const XML_Char
*attVal
;
4996 enum XML_Error result
= storeAttributeValue(
4997 parser
, enc
, parser
->m_declAttributeIsCdata
,
4998 s
+ enc
->minBytesPerChar
, next
- enc
->minBytesPerChar
, &dtd
->pool
,
5002 attVal
= poolStart(&dtd
->pool
);
5003 poolFinish(&dtd
->pool
);
5004 /* ID attributes aren't allowed to have a default */
5005 if (! defineAttribute(
5006 parser
->m_declElementType
, parser
->m_declAttributeId
,
5007 parser
->m_declAttributeIsCdata
, XML_FALSE
, attVal
, parser
))
5008 return XML_ERROR_NO_MEMORY
;
5009 if (parser
->m_attlistDeclHandler
&& parser
->m_declAttributeType
) {
5010 if (*parser
->m_declAttributeType
== XML_T(ASCII_LPAREN
)
5011 || (*parser
->m_declAttributeType
== XML_T(ASCII_N
)
5012 && parser
->m_declAttributeType
[1] == XML_T(ASCII_O
))) {
5013 /* Enumerated or Notation type */
5014 if (! poolAppendChar(&parser
->m_tempPool
, XML_T(ASCII_RPAREN
))
5015 || ! poolAppendChar(&parser
->m_tempPool
, XML_T('\0')))
5016 return XML_ERROR_NO_MEMORY
;
5017 parser
->m_declAttributeType
= parser
->m_tempPool
.start
;
5018 poolFinish(&parser
->m_tempPool
);
5021 parser
->m_attlistDeclHandler(
5022 parser
->m_handlerArg
, parser
->m_declElementType
->name
,
5023 parser
->m_declAttributeId
->name
, parser
->m_declAttributeType
,
5024 attVal
, role
== XML_ROLE_FIXED_ATTRIBUTE_VALUE
);
5025 poolClear(&parser
->m_tempPool
);
5026 handleDefault
= XML_FALSE
;
5030 case XML_ROLE_ENTITY_VALUE
:
5031 if (dtd
->keepProcessing
) {
5032 enum XML_Error result
5033 = storeEntityValue(parser
, enc
, s
+ enc
->minBytesPerChar
,
5034 next
- enc
->minBytesPerChar
, XML_ACCOUNT_NONE
);
5035 if (parser
->m_declEntity
) {
5036 parser
->m_declEntity
->textPtr
= poolStart(&dtd
->entityValuePool
);
5037 parser
->m_declEntity
->textLen
5038 = (int)(poolLength(&dtd
->entityValuePool
));
5039 poolFinish(&dtd
->entityValuePool
);
5040 if (parser
->m_entityDeclHandler
) {
5042 parser
->m_entityDeclHandler(
5043 parser
->m_handlerArg
, parser
->m_declEntity
->name
,
5044 parser
->m_declEntity
->is_param
, parser
->m_declEntity
->textPtr
,
5045 parser
->m_declEntity
->textLen
, parser
->m_curBase
, 0, 0, 0);
5046 handleDefault
= XML_FALSE
;
5049 poolDiscard(&dtd
->entityValuePool
);
5050 if (result
!= XML_ERROR_NONE
)
5054 case XML_ROLE_DOCTYPE_SYSTEM_ID
:
5056 parser
->m_useForeignDTD
= XML_FALSE
;
5057 #endif /* XML_DTD */
5058 dtd
->hasParamEntityRefs
= XML_TRUE
;
5059 if (parser
->m_startDoctypeDeclHandler
) {
5060 parser
->m_doctypeSysid
= poolStoreString(&parser
->m_tempPool
, enc
,
5061 s
+ enc
->minBytesPerChar
,
5062 next
- enc
->minBytesPerChar
);
5063 if (parser
->m_doctypeSysid
== NULL
)
5064 return XML_ERROR_NO_MEMORY
;
5065 poolFinish(&parser
->m_tempPool
);
5066 handleDefault
= XML_FALSE
;
5070 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5071 for the case where no parser->m_startDoctypeDeclHandler is set */
5072 parser
->m_doctypeSysid
= externalSubsetName
;
5073 #endif /* XML_DTD */
5074 if (! dtd
->standalone
5076 && ! parser
->m_paramEntityParsing
5077 #endif /* XML_DTD */
5078 && parser
->m_notStandaloneHandler
5079 && ! parser
->m_notStandaloneHandler(parser
->m_handlerArg
))
5080 return XML_ERROR_NOT_STANDALONE
;
5084 if (! parser
->m_declEntity
) {
5085 parser
->m_declEntity
= (ENTITY
*)lookup(
5086 parser
, &dtd
->paramEntities
, externalSubsetName
, sizeof(ENTITY
));
5087 if (! parser
->m_declEntity
)
5088 return XML_ERROR_NO_MEMORY
;
5089 parser
->m_declEntity
->publicId
= NULL
;
5091 #endif /* XML_DTD */
5093 case XML_ROLE_ENTITY_SYSTEM_ID
:
5094 if (dtd
->keepProcessing
&& parser
->m_declEntity
) {
5095 parser
->m_declEntity
->systemId
5096 = poolStoreString(&dtd
->pool
, enc
, s
+ enc
->minBytesPerChar
,
5097 next
- enc
->minBytesPerChar
);
5098 if (! parser
->m_declEntity
->systemId
)
5099 return XML_ERROR_NO_MEMORY
;
5100 parser
->m_declEntity
->base
= parser
->m_curBase
;
5101 poolFinish(&dtd
->pool
);
5102 /* Don't suppress the default handler if we fell through from
5103 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5105 if (parser
->m_entityDeclHandler
&& role
== XML_ROLE_ENTITY_SYSTEM_ID
)
5106 handleDefault
= XML_FALSE
;
5109 case XML_ROLE_ENTITY_COMPLETE
:
5110 if (dtd
->keepProcessing
&& parser
->m_declEntity
5111 && parser
->m_entityDeclHandler
) {
5113 parser
->m_entityDeclHandler(
5114 parser
->m_handlerArg
, parser
->m_declEntity
->name
,
5115 parser
->m_declEntity
->is_param
, 0, 0, parser
->m_declEntity
->base
,
5116 parser
->m_declEntity
->systemId
, parser
->m_declEntity
->publicId
, 0);
5117 handleDefault
= XML_FALSE
;
5120 case XML_ROLE_ENTITY_NOTATION_NAME
:
5121 if (dtd
->keepProcessing
&& parser
->m_declEntity
) {
5122 parser
->m_declEntity
->notation
5123 = poolStoreString(&dtd
->pool
, enc
, s
, next
);
5124 if (! parser
->m_declEntity
->notation
)
5125 return XML_ERROR_NO_MEMORY
;
5126 poolFinish(&dtd
->pool
);
5127 if (parser
->m_unparsedEntityDeclHandler
) {
5129 parser
->m_unparsedEntityDeclHandler(
5130 parser
->m_handlerArg
, parser
->m_declEntity
->name
,
5131 parser
->m_declEntity
->base
, parser
->m_declEntity
->systemId
,
5132 parser
->m_declEntity
->publicId
, parser
->m_declEntity
->notation
);
5133 handleDefault
= XML_FALSE
;
5134 } else if (parser
->m_entityDeclHandler
) {
5136 parser
->m_entityDeclHandler(
5137 parser
->m_handlerArg
, parser
->m_declEntity
->name
, 0, 0, 0,
5138 parser
->m_declEntity
->base
, parser
->m_declEntity
->systemId
,
5139 parser
->m_declEntity
->publicId
, parser
->m_declEntity
->notation
);
5140 handleDefault
= XML_FALSE
;
5144 case XML_ROLE_GENERAL_ENTITY_NAME
: {
5145 if (XmlPredefinedEntityName(enc
, s
, next
)) {
5146 parser
->m_declEntity
= NULL
;
5149 if (dtd
->keepProcessing
) {
5150 const XML_Char
*name
= poolStoreString(&dtd
->pool
, enc
, s
, next
);
5152 return XML_ERROR_NO_MEMORY
;
5153 parser
->m_declEntity
= (ENTITY
*)lookup(parser
, &dtd
->generalEntities
,
5154 name
, sizeof(ENTITY
));
5155 if (! parser
->m_declEntity
)
5156 return XML_ERROR_NO_MEMORY
;
5157 if (parser
->m_declEntity
->name
!= name
) {
5158 poolDiscard(&dtd
->pool
);
5159 parser
->m_declEntity
= NULL
;
5161 poolFinish(&dtd
->pool
);
5162 parser
->m_declEntity
->publicId
= NULL
;
5163 parser
->m_declEntity
->is_param
= XML_FALSE
;
5164 /* if we have a parent parser or are reading an internal parameter
5165 entity, then the entity declaration is not considered "internal"
5167 parser
->m_declEntity
->is_internal
5168 = ! (parser
->m_parentParser
|| parser
->m_openInternalEntities
);
5169 if (parser
->m_entityDeclHandler
)
5170 handleDefault
= XML_FALSE
;
5173 poolDiscard(&dtd
->pool
);
5174 parser
->m_declEntity
= NULL
;
5177 case XML_ROLE_PARAM_ENTITY_NAME
:
5179 if (dtd
->keepProcessing
) {
5180 const XML_Char
*name
= poolStoreString(&dtd
->pool
, enc
, s
, next
);
5182 return XML_ERROR_NO_MEMORY
;
5183 parser
->m_declEntity
= (ENTITY
*)lookup(parser
, &dtd
->paramEntities
,
5184 name
, sizeof(ENTITY
));
5185 if (! parser
->m_declEntity
)
5186 return XML_ERROR_NO_MEMORY
;
5187 if (parser
->m_declEntity
->name
!= name
) {
5188 poolDiscard(&dtd
->pool
);
5189 parser
->m_declEntity
= NULL
;
5191 poolFinish(&dtd
->pool
);
5192 parser
->m_declEntity
->publicId
= NULL
;
5193 parser
->m_declEntity
->is_param
= XML_TRUE
;
5194 /* if we have a parent parser or are reading an internal parameter
5195 entity, then the entity declaration is not considered "internal"
5197 parser
->m_declEntity
->is_internal
5198 = ! (parser
->m_parentParser
|| parser
->m_openInternalEntities
);
5199 if (parser
->m_entityDeclHandler
)
5200 handleDefault
= XML_FALSE
;
5203 poolDiscard(&dtd
->pool
);
5204 parser
->m_declEntity
= NULL
;
5206 #else /* not XML_DTD */
5207 parser
->m_declEntity
= NULL
;
5208 #endif /* XML_DTD */
5210 case XML_ROLE_NOTATION_NAME
:
5211 parser
->m_declNotationPublicId
= NULL
;
5212 parser
->m_declNotationName
= NULL
;
5213 if (parser
->m_notationDeclHandler
) {
5214 parser
->m_declNotationName
5215 = poolStoreString(&parser
->m_tempPool
, enc
, s
, next
);
5216 if (! parser
->m_declNotationName
)
5217 return XML_ERROR_NO_MEMORY
;
5218 poolFinish(&parser
->m_tempPool
);
5219 handleDefault
= XML_FALSE
;
5222 case XML_ROLE_NOTATION_PUBLIC_ID
:
5223 if (! XmlIsPublicId(enc
, s
, next
, eventPP
))
5224 return XML_ERROR_PUBLICID
;
5226 ->m_declNotationName
) { /* means m_notationDeclHandler != NULL */
5227 XML_Char
*tem
= poolStoreString(&parser
->m_tempPool
, enc
,
5228 s
+ enc
->minBytesPerChar
,
5229 next
- enc
->minBytesPerChar
);
5231 return XML_ERROR_NO_MEMORY
;
5232 normalizePublicId(tem
);
5233 parser
->m_declNotationPublicId
= tem
;
5234 poolFinish(&parser
->m_tempPool
);
5235 handleDefault
= XML_FALSE
;
5238 case XML_ROLE_NOTATION_SYSTEM_ID
:
5239 if (parser
->m_declNotationName
&& parser
->m_notationDeclHandler
) {
5240 const XML_Char
*systemId
= poolStoreString(&parser
->m_tempPool
, enc
,
5241 s
+ enc
->minBytesPerChar
,
5242 next
- enc
->minBytesPerChar
);
5244 return XML_ERROR_NO_MEMORY
;
5246 parser
->m_notationDeclHandler(
5247 parser
->m_handlerArg
, parser
->m_declNotationName
, parser
->m_curBase
,
5248 systemId
, parser
->m_declNotationPublicId
);
5249 handleDefault
= XML_FALSE
;
5251 poolClear(&parser
->m_tempPool
);
5253 case XML_ROLE_NOTATION_NO_SYSTEM_ID
:
5254 if (parser
->m_declNotationPublicId
&& parser
->m_notationDeclHandler
) {
5256 parser
->m_notationDeclHandler(
5257 parser
->m_handlerArg
, parser
->m_declNotationName
, parser
->m_curBase
,
5258 0, parser
->m_declNotationPublicId
);
5259 handleDefault
= XML_FALSE
;
5261 poolClear(&parser
->m_tempPool
);
5263 case XML_ROLE_ERROR
:
5265 case XML_TOK_PARAM_ENTITY_REF
:
5266 /* PE references in internal subset are
5267 not allowed within declarations. */
5268 return XML_ERROR_PARAM_ENTITY_REF
;
5269 case XML_TOK_XML_DECL
:
5270 return XML_ERROR_MISPLACED_XML_PI
;
5272 return XML_ERROR_SYNTAX
;
5275 case XML_ROLE_IGNORE_SECT
: {
5276 enum XML_Error result
;
5277 if (parser
->m_defaultHandler
)
5278 reportDefault(parser
, enc
, s
, next
);
5279 handleDefault
= XML_FALSE
;
5280 result
= doIgnoreSection(parser
, enc
, &next
, end
, nextPtr
, haveMore
);
5281 if (result
!= XML_ERROR_NONE
)
5284 parser
->m_processor
= ignoreSectionProcessor
;
5288 #endif /* XML_DTD */
5289 case XML_ROLE_GROUP_OPEN
:
5290 if (parser
->m_prologState
.level
>= parser
->m_groupSize
) {
5291 if (parser
->m_groupSize
) {
5293 /* Detect and prevent integer overflow */
5294 if (parser
->m_groupSize
> (unsigned int)(-1) / 2u) {
5295 return XML_ERROR_NO_MEMORY
;
5298 char *const new_connector
= (char *)REALLOC(
5299 parser
, parser
->m_groupConnector
, parser
->m_groupSize
*= 2);
5300 if (new_connector
== NULL
) {
5301 parser
->m_groupSize
/= 2;
5302 return XML_ERROR_NO_MEMORY
;
5304 parser
->m_groupConnector
= new_connector
;
5307 if (dtd
->scaffIndex
) {
5308 /* Detect and prevent integer overflow.
5309 * The preprocessor guard addresses the "always false" warning
5310 * from -Wtype-limits on platforms where
5311 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5312 #if UINT_MAX >= SIZE_MAX
5313 if (parser
->m_groupSize
> (size_t)(-1) / sizeof(int)) {
5314 return XML_ERROR_NO_MEMORY
;
5318 int *const new_scaff_index
= (int *)REALLOC(
5319 parser
, dtd
->scaffIndex
, parser
->m_groupSize
* sizeof(int));
5320 if (new_scaff_index
== NULL
)
5321 return XML_ERROR_NO_MEMORY
;
5322 dtd
->scaffIndex
= new_scaff_index
;
5325 parser
->m_groupConnector
5326 = (char *)MALLOC(parser
, parser
->m_groupSize
= 32);
5327 if (! parser
->m_groupConnector
) {
5328 parser
->m_groupSize
= 0;
5329 return XML_ERROR_NO_MEMORY
;
5333 parser
->m_groupConnector
[parser
->m_prologState
.level
] = 0;
5334 if (dtd
->in_eldecl
) {
5335 int myindex
= nextScaffoldPart(parser
);
5337 return XML_ERROR_NO_MEMORY
;
5338 assert(dtd
->scaffIndex
!= NULL
);
5339 dtd
->scaffIndex
[dtd
->scaffLevel
] = myindex
;
5341 dtd
->scaffold
[myindex
].type
= XML_CTYPE_SEQ
;
5342 if (parser
->m_elementDeclHandler
)
5343 handleDefault
= XML_FALSE
;
5346 case XML_ROLE_GROUP_SEQUENCE
:
5347 if (parser
->m_groupConnector
[parser
->m_prologState
.level
] == ASCII_PIPE
)
5348 return XML_ERROR_SYNTAX
;
5349 parser
->m_groupConnector
[parser
->m_prologState
.level
] = ASCII_COMMA
;
5350 if (dtd
->in_eldecl
&& parser
->m_elementDeclHandler
)
5351 handleDefault
= XML_FALSE
;
5353 case XML_ROLE_GROUP_CHOICE
:
5354 if (parser
->m_groupConnector
[parser
->m_prologState
.level
] == ASCII_COMMA
)
5355 return XML_ERROR_SYNTAX
;
5357 && ! parser
->m_groupConnector
[parser
->m_prologState
.level
]
5358 && (dtd
->scaffold
[dtd
->scaffIndex
[dtd
->scaffLevel
- 1]].type
5359 != XML_CTYPE_MIXED
)) {
5360 dtd
->scaffold
[dtd
->scaffIndex
[dtd
->scaffLevel
- 1]].type
5362 if (parser
->m_elementDeclHandler
)
5363 handleDefault
= XML_FALSE
;
5365 parser
->m_groupConnector
[parser
->m_prologState
.level
] = ASCII_PIPE
;
5367 case XML_ROLE_PARAM_ENTITY_REF
:
5369 case XML_ROLE_INNER_PARAM_ENTITY_REF
:
5370 dtd
->hasParamEntityRefs
= XML_TRUE
;
5371 if (! parser
->m_paramEntityParsing
)
5372 dtd
->keepProcessing
= dtd
->standalone
;
5374 const XML_Char
*name
;
5376 name
= poolStoreString(&dtd
->pool
, enc
, s
+ enc
->minBytesPerChar
,
5377 next
- enc
->minBytesPerChar
);
5379 return XML_ERROR_NO_MEMORY
;
5380 entity
= (ENTITY
*)lookup(parser
, &dtd
->paramEntities
, name
, 0);
5381 poolDiscard(&dtd
->pool
);
5382 /* first, determine if a check for an existing declaration is needed;
5383 if yes, check that the entity exists, and that it is internal,
5384 otherwise call the skipped entity handler
5386 if (parser
->m_prologState
.documentEntity
5387 && (dtd
->standalone
? ! parser
->m_openInternalEntities
5388 : ! dtd
->hasParamEntityRefs
)) {
5390 return XML_ERROR_UNDEFINED_ENTITY
;
5391 else if (! entity
->is_internal
) {
5392 /* It's hard to exhaustively search the code to be sure,
5393 * but there doesn't seem to be a way of executing the
5394 * following line. There are two cases:
5396 * If 'standalone' is false, the DTD must have no
5397 * parameter entities or we wouldn't have passed the outer
5398 * 'if' statement. That means the only entity in the hash
5399 * table is the external subset name "#" which cannot be
5400 * given as a parameter entity name in XML syntax, so the
5401 * lookup must have returned NULL and we don't even reach
5402 * the test for an internal entity.
5404 * If 'standalone' is true, it does not seem to be
5405 * possible to create entities taking this code path that
5406 * are not internal entities, so fail the test above.
5408 * Because this analysis is very uncertain, the code is
5409 * being left in place and merely removed from the
5410 * coverage test statistics.
5412 return XML_ERROR_ENTITY_DECLARED_IN_PE
; /* LCOV_EXCL_LINE */
5414 } else if (! entity
) {
5415 dtd
->keepProcessing
= dtd
->standalone
;
5416 /* cannot report skipped entities in declarations */
5417 if ((role
== XML_ROLE_PARAM_ENTITY_REF
)
5418 && parser
->m_skippedEntityHandler
) {
5419 parser
->m_skippedEntityHandler(parser
->m_handlerArg
, name
, 1);
5420 handleDefault
= XML_FALSE
;
5425 return XML_ERROR_RECURSIVE_ENTITY_REF
;
5426 if (entity
->textPtr
) {
5427 enum XML_Error result
;
5428 XML_Bool betweenDecl
5429 = (role
== XML_ROLE_PARAM_ENTITY_REF
? XML_TRUE
: XML_FALSE
);
5430 result
= processInternalEntity(parser
, entity
, betweenDecl
);
5431 if (result
!= XML_ERROR_NONE
)
5433 handleDefault
= XML_FALSE
;
5436 if (parser
->m_externalEntityRefHandler
) {
5437 dtd
->paramEntityRead
= XML_FALSE
;
5438 entity
->open
= XML_TRUE
;
5439 entityTrackingOnOpen(parser
, entity
, __LINE__
);
5440 if (! parser
->m_externalEntityRefHandler(
5441 parser
->m_externalEntityRefHandlerArg
, 0, entity
->base
,
5442 entity
->systemId
, entity
->publicId
)) {
5443 entityTrackingOnClose(parser
, entity
, __LINE__
);
5444 entity
->open
= XML_FALSE
;
5445 return XML_ERROR_EXTERNAL_ENTITY_HANDLING
;
5447 entityTrackingOnClose(parser
, entity
, __LINE__
);
5448 entity
->open
= XML_FALSE
;
5449 handleDefault
= XML_FALSE
;
5450 if (! dtd
->paramEntityRead
) {
5451 dtd
->keepProcessing
= dtd
->standalone
;
5455 dtd
->keepProcessing
= dtd
->standalone
;
5459 #endif /* XML_DTD */
5460 if (! dtd
->standalone
&& parser
->m_notStandaloneHandler
5461 && ! parser
->m_notStandaloneHandler(parser
->m_handlerArg
))
5462 return XML_ERROR_NOT_STANDALONE
;
5465 /* Element declaration stuff */
5467 case XML_ROLE_ELEMENT_NAME
:
5468 if (parser
->m_elementDeclHandler
) {
5469 parser
->m_declElementType
= getElementType(parser
, enc
, s
, next
);
5470 if (! parser
->m_declElementType
)
5471 return XML_ERROR_NO_MEMORY
;
5472 dtd
->scaffLevel
= 0;
5473 dtd
->scaffCount
= 0;
5474 dtd
->in_eldecl
= XML_TRUE
;
5475 handleDefault
= XML_FALSE
;
5479 case XML_ROLE_CONTENT_ANY
:
5480 case XML_ROLE_CONTENT_EMPTY
:
5481 if (dtd
->in_eldecl
) {
5482 if (parser
->m_elementDeclHandler
) {
5483 XML_Content
*content
5484 = (XML_Content
*)MALLOC(parser
, sizeof(XML_Content
));
5486 return XML_ERROR_NO_MEMORY
;
5487 content
->quant
= XML_CQUANT_NONE
;
5488 content
->name
= NULL
;
5489 content
->numchildren
= 0;
5490 content
->children
= NULL
;
5491 content
->type
= ((role
== XML_ROLE_CONTENT_ANY
) ? XML_CTYPE_ANY
5494 parser
->m_elementDeclHandler(
5495 parser
->m_handlerArg
, parser
->m_declElementType
->name
, content
);
5496 handleDefault
= XML_FALSE
;
5498 dtd
->in_eldecl
= XML_FALSE
;
5502 case XML_ROLE_CONTENT_PCDATA
:
5503 if (dtd
->in_eldecl
) {
5504 dtd
->scaffold
[dtd
->scaffIndex
[dtd
->scaffLevel
- 1]].type
5506 if (parser
->m_elementDeclHandler
)
5507 handleDefault
= XML_FALSE
;
5511 case XML_ROLE_CONTENT_ELEMENT
:
5512 quant
= XML_CQUANT_NONE
;
5513 goto elementContent
;
5514 case XML_ROLE_CONTENT_ELEMENT_OPT
:
5515 quant
= XML_CQUANT_OPT
;
5516 goto elementContent
;
5517 case XML_ROLE_CONTENT_ELEMENT_REP
:
5518 quant
= XML_CQUANT_REP
;
5519 goto elementContent
;
5520 case XML_ROLE_CONTENT_ELEMENT_PLUS
:
5521 quant
= XML_CQUANT_PLUS
;
5523 if (dtd
->in_eldecl
) {
5525 const XML_Char
*name
;
5528 = (quant
== XML_CQUANT_NONE
? next
: next
- enc
->minBytesPerChar
);
5529 int myindex
= nextScaffoldPart(parser
);
5531 return XML_ERROR_NO_MEMORY
;
5532 dtd
->scaffold
[myindex
].type
= XML_CTYPE_NAME
;
5533 dtd
->scaffold
[myindex
].quant
= quant
;
5534 el
= getElementType(parser
, enc
, s
, nxt
);
5536 return XML_ERROR_NO_MEMORY
;
5538 dtd
->scaffold
[myindex
].name
= name
;
5540 for (; name
[nameLen
++];)
5543 /* Detect and prevent integer overflow */
5544 if (nameLen
> UINT_MAX
- dtd
->contentStringLen
) {
5545 return XML_ERROR_NO_MEMORY
;
5548 dtd
->contentStringLen
+= (unsigned)nameLen
;
5549 if (parser
->m_elementDeclHandler
)
5550 handleDefault
= XML_FALSE
;
5554 case XML_ROLE_GROUP_CLOSE
:
5555 quant
= XML_CQUANT_NONE
;
5557 case XML_ROLE_GROUP_CLOSE_OPT
:
5558 quant
= XML_CQUANT_OPT
;
5560 case XML_ROLE_GROUP_CLOSE_REP
:
5561 quant
= XML_CQUANT_REP
;
5563 case XML_ROLE_GROUP_CLOSE_PLUS
:
5564 quant
= XML_CQUANT_PLUS
;
5566 if (dtd
->in_eldecl
) {
5567 if (parser
->m_elementDeclHandler
)
5568 handleDefault
= XML_FALSE
;
5570 dtd
->scaffold
[dtd
->scaffIndex
[dtd
->scaffLevel
]].quant
= quant
;
5571 if (dtd
->scaffLevel
== 0) {
5572 if (! handleDefault
) {
5573 XML_Content
*model
= build_model(parser
);
5575 return XML_ERROR_NO_MEMORY
;
5577 parser
->m_elementDeclHandler(
5578 parser
->m_handlerArg
, parser
->m_declElementType
->name
, model
);
5580 dtd
->in_eldecl
= XML_FALSE
;
5581 dtd
->contentStringLen
= 0;
5585 /* End element declaration stuff */
5588 if (! reportProcessingInstruction(parser
, enc
, s
, next
))
5589 return XML_ERROR_NO_MEMORY
;
5590 handleDefault
= XML_FALSE
;
5592 case XML_ROLE_COMMENT
:
5593 if (! reportComment(parser
, enc
, s
, next
))
5594 return XML_ERROR_NO_MEMORY
;
5595 handleDefault
= XML_FALSE
;
5600 handleDefault
= XML_FALSE
;
5604 case XML_ROLE_DOCTYPE_NONE
:
5605 if (parser
->m_startDoctypeDeclHandler
)
5606 handleDefault
= XML_FALSE
;
5608 case XML_ROLE_ENTITY_NONE
:
5609 if (dtd
->keepProcessing
&& parser
->m_entityDeclHandler
)
5610 handleDefault
= XML_FALSE
;
5612 case XML_ROLE_NOTATION_NONE
:
5613 if (parser
->m_notationDeclHandler
)
5614 handleDefault
= XML_FALSE
;
5616 case XML_ROLE_ATTLIST_NONE
:
5617 if (dtd
->keepProcessing
&& parser
->m_attlistDeclHandler
)
5618 handleDefault
= XML_FALSE
;
5620 case XML_ROLE_ELEMENT_NONE
:
5621 if (parser
->m_elementDeclHandler
)
5622 handleDefault
= XML_FALSE
;
5624 } /* end of big switch */
5626 if (handleDefault
&& parser
->m_defaultHandler
)
5627 reportDefault(parser
, enc
, s
, next
);
5629 switch (parser
->m_parsingStatus
.parsing
) {
5632 return XML_ERROR_NONE
;
5634 return XML_ERROR_ABORTED
;
5637 tok
= XmlPrologTok(enc
, s
, end
, &next
);
5643 static enum XML_Error PTRCALL
5644 epilogProcessor(XML_Parser parser
, const char *s
, const char *end
,
5645 const char **nextPtr
) {
5646 parser
->m_processor
= epilogProcessor
;
5647 parser
->m_eventPtr
= s
;
5649 const char *next
= NULL
;
5650 int tok
= XmlPrologTok(parser
->m_encoding
, s
, end
, &next
);
5652 if (! accountingDiffTolerated(parser
, tok
, s
, next
, __LINE__
,
5653 XML_ACCOUNT_DIRECT
)) {
5654 accountingOnAbort(parser
);
5655 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
5658 parser
->m_eventEndPtr
= next
;
5660 /* report partial linebreak - it might be the last token */
5661 case -XML_TOK_PROLOG_S
:
5662 if (parser
->m_defaultHandler
) {
5663 reportDefault(parser
, parser
->m_encoding
, s
, next
);
5664 if (parser
->m_parsingStatus
.parsing
== XML_FINISHED
)
5665 return XML_ERROR_ABORTED
;
5668 return XML_ERROR_NONE
;
5671 return XML_ERROR_NONE
;
5672 case XML_TOK_PROLOG_S
:
5673 if (parser
->m_defaultHandler
)
5674 reportDefault(parser
, parser
->m_encoding
, s
, next
);
5677 if (! reportProcessingInstruction(parser
, parser
->m_encoding
, s
, next
))
5678 return XML_ERROR_NO_MEMORY
;
5680 case XML_TOK_COMMENT
:
5681 if (! reportComment(parser
, parser
->m_encoding
, s
, next
))
5682 return XML_ERROR_NO_MEMORY
;
5684 case XML_TOK_INVALID
:
5685 parser
->m_eventPtr
= next
;
5686 return XML_ERROR_INVALID_TOKEN
;
5687 case XML_TOK_PARTIAL
:
5688 if (! parser
->m_parsingStatus
.finalBuffer
) {
5690 return XML_ERROR_NONE
;
5692 return XML_ERROR_UNCLOSED_TOKEN
;
5693 case XML_TOK_PARTIAL_CHAR
:
5694 if (! parser
->m_parsingStatus
.finalBuffer
) {
5696 return XML_ERROR_NONE
;
5698 return XML_ERROR_PARTIAL_CHAR
;
5700 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT
;
5702 parser
->m_eventPtr
= s
= next
;
5703 switch (parser
->m_parsingStatus
.parsing
) {
5706 return XML_ERROR_NONE
;
5708 return XML_ERROR_ABORTED
;
5714 static enum XML_Error
5715 processInternalEntity(XML_Parser parser
, ENTITY
*entity
, XML_Bool betweenDecl
) {
5716 const char *textStart
, *textEnd
;
5718 enum XML_Error result
;
5719 OPEN_INTERNAL_ENTITY
*openEntity
;
5721 if (parser
->m_freeInternalEntities
) {
5722 openEntity
= parser
->m_freeInternalEntities
;
5723 parser
->m_freeInternalEntities
= openEntity
->next
;
5726 = (OPEN_INTERNAL_ENTITY
*)MALLOC(parser
, sizeof(OPEN_INTERNAL_ENTITY
));
5728 return XML_ERROR_NO_MEMORY
;
5730 entity
->open
= XML_TRUE
;
5732 entityTrackingOnOpen(parser
, entity
, __LINE__
);
5734 entity
->processed
= 0;
5735 openEntity
->next
= parser
->m_openInternalEntities
;
5736 parser
->m_openInternalEntities
= openEntity
;
5737 openEntity
->entity
= entity
;
5738 openEntity
->startTagLevel
= parser
->m_tagLevel
;
5739 openEntity
->betweenDecl
= betweenDecl
;
5740 openEntity
->internalEventPtr
= NULL
;
5741 openEntity
->internalEventEndPtr
= NULL
;
5742 textStart
= (const char *)entity
->textPtr
;
5743 textEnd
= (const char *)(entity
->textPtr
+ entity
->textLen
);
5744 /* Set a safe default value in case 'next' does not get set */
5748 if (entity
->is_param
) {
5750 = XmlPrologTok(parser
->m_internalEncoding
, textStart
, textEnd
, &next
);
5751 result
= doProlog(parser
, parser
->m_internalEncoding
, textStart
, textEnd
,
5752 tok
, next
, &next
, XML_FALSE
, XML_FALSE
,
5753 XML_ACCOUNT_ENTITY_EXPANSION
);
5755 #endif /* XML_DTD */
5756 result
= doContent(parser
, parser
->m_tagLevel
, parser
->m_internalEncoding
,
5757 textStart
, textEnd
, &next
, XML_FALSE
,
5758 XML_ACCOUNT_ENTITY_EXPANSION
);
5760 if (result
== XML_ERROR_NONE
) {
5761 if (textEnd
!= next
&& parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
) {
5762 entity
->processed
= (int)(next
- textStart
);
5763 parser
->m_processor
= internalEntityProcessor
;
5766 entityTrackingOnClose(parser
, entity
, __LINE__
);
5767 #endif /* XML_DTD */
5768 entity
->open
= XML_FALSE
;
5769 parser
->m_openInternalEntities
= openEntity
->next
;
5770 /* put openEntity back in list of free instances */
5771 openEntity
->next
= parser
->m_freeInternalEntities
;
5772 parser
->m_freeInternalEntities
= openEntity
;
5778 static enum XML_Error PTRCALL
5779 internalEntityProcessor(XML_Parser parser
, const char *s
, const char *end
,
5780 const char **nextPtr
) {
5782 const char *textStart
, *textEnd
;
5784 enum XML_Error result
;
5785 OPEN_INTERNAL_ENTITY
*openEntity
= parser
->m_openInternalEntities
;
5787 return XML_ERROR_UNEXPECTED_STATE
;
5789 entity
= openEntity
->entity
;
5790 textStart
= ((const char *)entity
->textPtr
) + entity
->processed
;
5791 textEnd
= (const char *)(entity
->textPtr
+ entity
->textLen
);
5792 /* Set a safe default value in case 'next' does not get set */
5796 if (entity
->is_param
) {
5798 = XmlPrologTok(parser
->m_internalEncoding
, textStart
, textEnd
, &next
);
5799 result
= doProlog(parser
, parser
->m_internalEncoding
, textStart
, textEnd
,
5800 tok
, next
, &next
, XML_FALSE
, XML_TRUE
,
5801 XML_ACCOUNT_ENTITY_EXPANSION
);
5803 #endif /* XML_DTD */
5804 result
= doContent(parser
, openEntity
->startTagLevel
,
5805 parser
->m_internalEncoding
, textStart
, textEnd
, &next
,
5806 XML_FALSE
, XML_ACCOUNT_ENTITY_EXPANSION
);
5808 if (result
!= XML_ERROR_NONE
)
5811 if (textEnd
!= next
&& parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
) {
5812 entity
->processed
= (int)(next
- (const char *)entity
->textPtr
);
5817 entityTrackingOnClose(parser
, entity
, __LINE__
);
5819 entity
->open
= XML_FALSE
;
5820 parser
->m_openInternalEntities
= openEntity
->next
;
5821 /* put openEntity back in list of free instances */
5822 openEntity
->next
= parser
->m_freeInternalEntities
;
5823 parser
->m_freeInternalEntities
= openEntity
;
5825 // If there are more open entities we want to stop right here and have the
5826 // upcoming call to XML_ResumeParser continue with entity content, or it would
5827 // be ignored altogether.
5828 if (parser
->m_openInternalEntities
!= NULL
5829 && parser
->m_parsingStatus
.parsing
== XML_SUSPENDED
) {
5830 return XML_ERROR_NONE
;
5834 if (entity
->is_param
) {
5836 parser
->m_processor
= prologProcessor
;
5837 tok
= XmlPrologTok(parser
->m_encoding
, s
, end
, &next
);
5838 return doProlog(parser
, parser
->m_encoding
, s
, end
, tok
, next
, nextPtr
,
5839 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
, XML_TRUE
,
5840 XML_ACCOUNT_DIRECT
);
5842 #endif /* XML_DTD */
5844 parser
->m_processor
= contentProcessor
;
5845 /* see externalEntityContentProcessor vs contentProcessor */
5846 result
= doContent(parser
, parser
->m_parentParser
? 1 : 0,
5847 parser
->m_encoding
, s
, end
, nextPtr
,
5848 (XML_Bool
)! parser
->m_parsingStatus
.finalBuffer
,
5849 XML_ACCOUNT_DIRECT
);
5850 if (result
== XML_ERROR_NONE
) {
5851 if (! storeRawNames(parser
))
5852 return XML_ERROR_NO_MEMORY
;
5858 static enum XML_Error PTRCALL
5859 errorProcessor(XML_Parser parser
, const char *s
, const char *end
,
5860 const char **nextPtr
) {
5864 return parser
->m_errorCode
;
5867 static enum XML_Error
5868 storeAttributeValue(XML_Parser parser
, const ENCODING
*enc
, XML_Bool isCdata
,
5869 const char *ptr
, const char *end
, STRING_POOL
*pool
,
5870 enum XML_Account account
) {
5871 enum XML_Error result
5872 = appendAttributeValue(parser
, enc
, isCdata
, ptr
, end
, pool
, account
);
5875 if (! isCdata
&& poolLength(pool
) && poolLastChar(pool
) == 0x20)
5877 if (! poolAppendChar(pool
, XML_T('\0')))
5878 return XML_ERROR_NO_MEMORY
;
5879 return XML_ERROR_NONE
;
5882 static enum XML_Error
5883 appendAttributeValue(XML_Parser parser
, const ENCODING
*enc
, XML_Bool isCdata
,
5884 const char *ptr
, const char *end
, STRING_POOL
*pool
,
5885 enum XML_Account account
) {
5886 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
5893 = ptr
; /* XmlAttributeValueTok doesn't always set the last arg */
5894 int tok
= XmlAttributeValueTok(enc
, ptr
, end
, &next
);
5896 if (! accountingDiffTolerated(parser
, tok
, ptr
, next
, __LINE__
, account
)) {
5897 accountingOnAbort(parser
);
5898 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
5903 return XML_ERROR_NONE
;
5904 case XML_TOK_INVALID
:
5905 if (enc
== parser
->m_encoding
)
5906 parser
->m_eventPtr
= next
;
5907 return XML_ERROR_INVALID_TOKEN
;
5908 case XML_TOK_PARTIAL
:
5909 if (enc
== parser
->m_encoding
)
5910 parser
->m_eventPtr
= ptr
;
5911 return XML_ERROR_INVALID_TOKEN
;
5912 case XML_TOK_CHAR_REF
: {
5913 XML_Char buf
[XML_ENCODE_MAX
];
5915 int n
= XmlCharRefNumber(enc
, ptr
);
5917 if (enc
== parser
->m_encoding
)
5918 parser
->m_eventPtr
= ptr
;
5919 return XML_ERROR_BAD_CHAR_REF
;
5921 if (! isCdata
&& n
== 0x20 /* space */
5922 && (poolLength(pool
) == 0 || poolLastChar(pool
) == 0x20))
5924 n
= XmlEncode(n
, (ICHAR
*)buf
);
5925 /* The XmlEncode() functions can never return 0 here. That
5926 * error return happens if the code point passed in is either
5927 * negative or greater than or equal to 0x110000. The
5928 * XmlCharRefNumber() functions will all return a number
5929 * strictly less than 0x110000 or a negative value if an error
5930 * occurred. The negative value is intercepted above, so
5931 * XmlEncode() is never passed a value it might return an
5934 for (i
= 0; i
< n
; i
++) {
5935 if (! poolAppendChar(pool
, buf
[i
]))
5936 return XML_ERROR_NO_MEMORY
;
5939 case XML_TOK_DATA_CHARS
:
5940 if (! poolAppend(pool
, enc
, ptr
, next
))
5941 return XML_ERROR_NO_MEMORY
;
5943 case XML_TOK_TRAILING_CR
:
5944 next
= ptr
+ enc
->minBytesPerChar
;
5946 case XML_TOK_ATTRIBUTE_VALUE_S
:
5947 case XML_TOK_DATA_NEWLINE
:
5948 if (! isCdata
&& (poolLength(pool
) == 0 || poolLastChar(pool
) == 0x20))
5950 if (! poolAppendChar(pool
, 0x20))
5951 return XML_ERROR_NO_MEMORY
;
5953 case XML_TOK_ENTITY_REF
: {
5954 const XML_Char
*name
;
5956 char checkEntityDecl
;
5957 XML_Char ch
= (XML_Char
)XmlPredefinedEntityName(
5958 enc
, ptr
+ enc
->minBytesPerChar
, next
- enc
->minBytesPerChar
);
5961 /* NOTE: We are replacing 4-6 characters original input for 1 character
5962 * so there is no amplification and hence recording without
5964 accountingDiffTolerated(parser
, tok
, (char *)&ch
,
5965 ((char *)&ch
) + sizeof(XML_Char
), __LINE__
,
5966 XML_ACCOUNT_ENTITY_EXPANSION
);
5967 #endif /* XML_DTD */
5968 if (! poolAppendChar(pool
, ch
))
5969 return XML_ERROR_NO_MEMORY
;
5972 name
= poolStoreString(&parser
->m_temp2Pool
, enc
,
5973 ptr
+ enc
->minBytesPerChar
,
5974 next
- enc
->minBytesPerChar
);
5976 return XML_ERROR_NO_MEMORY
;
5977 entity
= (ENTITY
*)lookup(parser
, &dtd
->generalEntities
, name
, 0);
5978 poolDiscard(&parser
->m_temp2Pool
);
5979 /* First, determine if a check for an existing declaration is needed;
5980 if yes, check that the entity exists, and that it is internal.
5982 if (pool
== &dtd
->pool
) /* are we called from prolog? */
5985 parser
->m_prologState
.documentEntity
&&
5986 #endif /* XML_DTD */
5987 (dtd
->standalone
? ! parser
->m_openInternalEntities
5988 : ! dtd
->hasParamEntityRefs
);
5989 else /* if (pool == &parser->m_tempPool): we are called from content */
5990 checkEntityDecl
= ! dtd
->hasParamEntityRefs
|| dtd
->standalone
;
5991 if (checkEntityDecl
) {
5993 return XML_ERROR_UNDEFINED_ENTITY
;
5994 else if (! entity
->is_internal
)
5995 return XML_ERROR_ENTITY_DECLARED_IN_PE
;
5996 } else if (! entity
) {
5997 /* Cannot report skipped entity here - see comments on
5998 parser->m_skippedEntityHandler.
5999 if (parser->m_skippedEntityHandler)
6000 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6002 /* Cannot call the default handler because this would be
6003 out of sync with the call to the startElementHandler.
6004 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6005 reportDefault(parser, enc, ptr, next);
6010 if (enc
== parser
->m_encoding
) {
6011 /* It does not appear that this line can be executed.
6013 * The "if (entity->open)" check catches recursive entity
6014 * definitions. In order to be called with an open
6015 * entity, it must have gone through this code before and
6016 * been through the recursive call to
6017 * appendAttributeValue() some lines below. That call
6018 * sets the local encoding ("enc") to the parser's
6019 * internal encoding (internal_utf8 or internal_utf16),
6020 * which can never be the same as the principle encoding.
6021 * It doesn't appear there is another code path that gets
6022 * here with entity->open being TRUE.
6024 * Since it is not certain that this logic is watertight,
6025 * we keep the line and merely exclude it from coverage
6028 parser
->m_eventPtr
= ptr
; /* LCOV_EXCL_LINE */
6030 return XML_ERROR_RECURSIVE_ENTITY_REF
;
6032 if (entity
->notation
) {
6033 if (enc
== parser
->m_encoding
)
6034 parser
->m_eventPtr
= ptr
;
6035 return XML_ERROR_BINARY_ENTITY_REF
;
6037 if (! entity
->textPtr
) {
6038 if (enc
== parser
->m_encoding
)
6039 parser
->m_eventPtr
= ptr
;
6040 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
;
6042 enum XML_Error result
;
6043 const XML_Char
*textEnd
= entity
->textPtr
+ entity
->textLen
;
6044 entity
->open
= XML_TRUE
;
6046 entityTrackingOnOpen(parser
, entity
, __LINE__
);
6048 result
= appendAttributeValue(parser
, parser
->m_internalEncoding
,
6049 isCdata
, (const char *)entity
->textPtr
,
6050 (const char *)textEnd
, pool
,
6051 XML_ACCOUNT_ENTITY_EXPANSION
);
6053 entityTrackingOnClose(parser
, entity
, __LINE__
);
6055 entity
->open
= XML_FALSE
;
6061 /* The only token returned by XmlAttributeValueTok() that does
6062 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6063 * Getting that would require an entity name to contain an
6064 * incomplete XML character (e.g. \xE2\x82); however previous
6065 * tokenisers will have already recognised and rejected such
6066 * names before XmlAttributeValueTok() gets a look-in. This
6067 * default case should be retained as a safety net, but the code
6068 * excluded from coverage tests.
6072 if (enc
== parser
->m_encoding
)
6073 parser
->m_eventPtr
= ptr
;
6074 return XML_ERROR_UNEXPECTED_STATE
;
6075 /* LCOV_EXCL_STOP */
6082 static enum XML_Error
6083 storeEntityValue(XML_Parser parser
, const ENCODING
*enc
,
6084 const char *entityTextPtr
, const char *entityTextEnd
,
6085 enum XML_Account account
) {
6086 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
6087 STRING_POOL
*pool
= &(dtd
->entityValuePool
);
6088 enum XML_Error result
= XML_ERROR_NONE
;
6090 int oldInEntityValue
= parser
->m_prologState
.inEntityValue
;
6091 parser
->m_prologState
.inEntityValue
= 1;
6094 #endif /* XML_DTD */
6095 /* never return Null for the value argument in EntityDeclHandler,
6096 since this would indicate an external entity; therefore we
6097 have to make sure that entityValuePool.start is not null */
6098 if (! pool
->blocks
) {
6099 if (! poolGrow(pool
))
6100 return XML_ERROR_NO_MEMORY
;
6105 = entityTextPtr
; /* XmlEntityValueTok doesn't always set the last arg */
6106 int tok
= XmlEntityValueTok(enc
, entityTextPtr
, entityTextEnd
, &next
);
6109 if (! accountingDiffTolerated(parser
, tok
, entityTextPtr
, next
, __LINE__
,
6111 accountingOnAbort(parser
);
6112 result
= XML_ERROR_AMPLIFICATION_LIMIT_BREACH
;
6113 goto endEntityValue
;
6118 case XML_TOK_PARAM_ENTITY_REF
:
6120 if (parser
->m_isParamEntity
|| enc
!= parser
->m_encoding
) {
6121 const XML_Char
*name
;
6123 name
= poolStoreString(&parser
->m_tempPool
, enc
,
6124 entityTextPtr
+ enc
->minBytesPerChar
,
6125 next
- enc
->minBytesPerChar
);
6127 result
= XML_ERROR_NO_MEMORY
;
6128 goto endEntityValue
;
6130 entity
= (ENTITY
*)lookup(parser
, &dtd
->paramEntities
, name
, 0);
6131 poolDiscard(&parser
->m_tempPool
);
6133 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6134 /* cannot report skipped entity here - see comments on
6135 parser->m_skippedEntityHandler
6136 if (parser->m_skippedEntityHandler)
6137 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6139 dtd
->keepProcessing
= dtd
->standalone
;
6140 goto endEntityValue
;
6143 if (enc
== parser
->m_encoding
)
6144 parser
->m_eventPtr
= entityTextPtr
;
6145 result
= XML_ERROR_RECURSIVE_ENTITY_REF
;
6146 goto endEntityValue
;
6148 if (entity
->systemId
) {
6149 if (parser
->m_externalEntityRefHandler
) {
6150 dtd
->paramEntityRead
= XML_FALSE
;
6151 entity
->open
= XML_TRUE
;
6152 entityTrackingOnOpen(parser
, entity
, __LINE__
);
6153 if (! parser
->m_externalEntityRefHandler(
6154 parser
->m_externalEntityRefHandlerArg
, 0, entity
->base
,
6155 entity
->systemId
, entity
->publicId
)) {
6156 entityTrackingOnClose(parser
, entity
, __LINE__
);
6157 entity
->open
= XML_FALSE
;
6158 result
= XML_ERROR_EXTERNAL_ENTITY_HANDLING
;
6159 goto endEntityValue
;
6161 entityTrackingOnClose(parser
, entity
, __LINE__
);
6162 entity
->open
= XML_FALSE
;
6163 if (! dtd
->paramEntityRead
)
6164 dtd
->keepProcessing
= dtd
->standalone
;
6166 dtd
->keepProcessing
= dtd
->standalone
;
6168 entity
->open
= XML_TRUE
;
6169 entityTrackingOnOpen(parser
, entity
, __LINE__
);
6170 result
= storeEntityValue(
6171 parser
, parser
->m_internalEncoding
, (const char *)entity
->textPtr
,
6172 (const char *)(entity
->textPtr
+ entity
->textLen
),
6173 XML_ACCOUNT_ENTITY_EXPANSION
);
6174 entityTrackingOnClose(parser
, entity
, __LINE__
);
6175 entity
->open
= XML_FALSE
;
6177 goto endEntityValue
;
6181 #endif /* XML_DTD */
6182 /* In the internal subset, PE references are not legal
6183 within markup declarations, e.g entity values in this case. */
6184 parser
->m_eventPtr
= entityTextPtr
;
6185 result
= XML_ERROR_PARAM_ENTITY_REF
;
6186 goto endEntityValue
;
6188 result
= XML_ERROR_NONE
;
6189 goto endEntityValue
;
6190 case XML_TOK_ENTITY_REF
:
6191 case XML_TOK_DATA_CHARS
:
6192 if (! poolAppend(pool
, enc
, entityTextPtr
, next
)) {
6193 result
= XML_ERROR_NO_MEMORY
;
6194 goto endEntityValue
;
6197 case XML_TOK_TRAILING_CR
:
6198 next
= entityTextPtr
+ enc
->minBytesPerChar
;
6200 case XML_TOK_DATA_NEWLINE
:
6201 if (pool
->end
== pool
->ptr
&& ! poolGrow(pool
)) {
6202 result
= XML_ERROR_NO_MEMORY
;
6203 goto endEntityValue
;
6205 *(pool
->ptr
)++ = 0xA;
6207 case XML_TOK_CHAR_REF
: {
6208 XML_Char buf
[XML_ENCODE_MAX
];
6210 int n
= XmlCharRefNumber(enc
, entityTextPtr
);
6212 if (enc
== parser
->m_encoding
)
6213 parser
->m_eventPtr
= entityTextPtr
;
6214 result
= XML_ERROR_BAD_CHAR_REF
;
6215 goto endEntityValue
;
6217 n
= XmlEncode(n
, (ICHAR
*)buf
);
6218 /* The XmlEncode() functions can never return 0 here. That
6219 * error return happens if the code point passed in is either
6220 * negative or greater than or equal to 0x110000. The
6221 * XmlCharRefNumber() functions will all return a number
6222 * strictly less than 0x110000 or a negative value if an error
6223 * occurred. The negative value is intercepted above, so
6224 * XmlEncode() is never passed a value it might return an
6227 for (i
= 0; i
< n
; i
++) {
6228 if (pool
->end
== pool
->ptr
&& ! poolGrow(pool
)) {
6229 result
= XML_ERROR_NO_MEMORY
;
6230 goto endEntityValue
;
6232 *(pool
->ptr
)++ = buf
[i
];
6235 case XML_TOK_PARTIAL
:
6236 if (enc
== parser
->m_encoding
)
6237 parser
->m_eventPtr
= entityTextPtr
;
6238 result
= XML_ERROR_INVALID_TOKEN
;
6239 goto endEntityValue
;
6240 case XML_TOK_INVALID
:
6241 if (enc
== parser
->m_encoding
)
6242 parser
->m_eventPtr
= next
;
6243 result
= XML_ERROR_INVALID_TOKEN
;
6244 goto endEntityValue
;
6246 /* This default case should be unnecessary -- all the tokens
6247 * that XmlEntityValueTok() can return have their own explicit
6248 * cases -- but should be retained for safety. We do however
6249 * exclude it from the coverage statistics.
6253 if (enc
== parser
->m_encoding
)
6254 parser
->m_eventPtr
= entityTextPtr
;
6255 result
= XML_ERROR_UNEXPECTED_STATE
;
6256 goto endEntityValue
;
6257 /* LCOV_EXCL_STOP */
6259 entityTextPtr
= next
;
6263 parser
->m_prologState
.inEntityValue
= oldInEntityValue
;
6264 #endif /* XML_DTD */
6268 static void FASTCALL
6269 normalizeLines(XML_Char
*s
) {
6272 if (*s
== XML_T('\0'))
6290 reportProcessingInstruction(XML_Parser parser
, const ENCODING
*enc
,
6291 const char *start
, const char *end
) {
6292 const XML_Char
*target
;
6295 if (! parser
->m_processingInstructionHandler
) {
6296 if (parser
->m_defaultHandler
)
6297 reportDefault(parser
, enc
, start
, end
);
6300 start
+= enc
->minBytesPerChar
* 2;
6301 tem
= start
+ XmlNameLength(enc
, start
);
6302 target
= poolStoreString(&parser
->m_tempPool
, enc
, start
, tem
);
6305 poolFinish(&parser
->m_tempPool
);
6306 data
= poolStoreString(&parser
->m_tempPool
, enc
, XmlSkipS(enc
, tem
),
6307 end
- enc
->minBytesPerChar
* 2);
6310 normalizeLines(data
);
6311 parser
->m_processingInstructionHandler(parser
->m_handlerArg
, target
, data
);
6312 poolClear(&parser
->m_tempPool
);
6317 reportComment(XML_Parser parser
, const ENCODING
*enc
, const char *start
,
6320 if (! parser
->m_commentHandler
) {
6321 if (parser
->m_defaultHandler
)
6322 reportDefault(parser
, enc
, start
, end
);
6325 data
= poolStoreString(&parser
->m_tempPool
, enc
,
6326 start
+ enc
->minBytesPerChar
* 4,
6327 end
- enc
->minBytesPerChar
* 3);
6330 normalizeLines(data
);
6331 parser
->m_commentHandler(parser
->m_handlerArg
, data
);
6332 poolClear(&parser
->m_tempPool
);
6337 reportDefault(XML_Parser parser
, const ENCODING
*enc
, const char *s
,
6339 if (MUST_CONVERT(enc
, s
)) {
6340 enum XML_Convert_Result convert_res
;
6341 const char **eventPP
;
6342 const char **eventEndPP
;
6343 if (enc
== parser
->m_encoding
) {
6344 eventPP
= &parser
->m_eventPtr
;
6345 eventEndPP
= &parser
->m_eventEndPtr
;
6347 /* To get here, two things must be true; the parser must be
6348 * using a character encoding that is not the same as the
6349 * encoding passed in, and the encoding passed in must need
6350 * conversion to the internal format (UTF-8 unless XML_UNICODE
6351 * is defined). The only occasions on which the encoding passed
6352 * in is not the same as the parser's encoding are when it is
6353 * the internal encoding (e.g. a previously defined parameter
6354 * entity, already converted to internal format). This by
6355 * definition doesn't need conversion, so the whole branch never
6358 * For safety's sake we don't delete these lines and merely
6359 * exclude them from coverage statistics.
6363 eventPP
= &(parser
->m_openInternalEntities
->internalEventPtr
);
6364 eventEndPP
= &(parser
->m_openInternalEntities
->internalEventEndPtr
);
6365 /* LCOV_EXCL_STOP */
6368 ICHAR
*dataPtr
= (ICHAR
*)parser
->m_dataBuf
;
6370 = XmlConvert(enc
, &s
, end
, &dataPtr
, (ICHAR
*)parser
->m_dataBufEnd
);
6372 parser
->m_defaultHandler(parser
->m_handlerArg
, parser
->m_dataBuf
,
6373 (int)(dataPtr
- (ICHAR
*)parser
->m_dataBuf
));
6375 } while ((convert_res
!= XML_CONVERT_COMPLETED
)
6376 && (convert_res
!= XML_CONVERT_INPUT_INCOMPLETE
));
6378 parser
->m_defaultHandler(parser
->m_handlerArg
, (XML_Char
*)s
,
6379 (int)((XML_Char
*)end
- (XML_Char
*)s
));
6383 defineAttribute(ELEMENT_TYPE
*type
, ATTRIBUTE_ID
*attId
, XML_Bool isCdata
,
6384 XML_Bool isId
, const XML_Char
*value
, XML_Parser parser
) {
6385 DEFAULT_ATTRIBUTE
*att
;
6386 if (value
|| isId
) {
6387 /* The handling of default attributes gets messed up if we have
6388 a default which duplicates a non-default. */
6390 for (i
= 0; i
< type
->nDefaultAtts
; i
++)
6391 if (attId
== type
->defaultAtts
[i
].id
)
6393 if (isId
&& ! type
->idAtt
&& ! attId
->xmlns
)
6394 type
->idAtt
= attId
;
6396 if (type
->nDefaultAtts
== type
->allocDefaultAtts
) {
6397 if (type
->allocDefaultAtts
== 0) {
6398 type
->allocDefaultAtts
= 8;
6399 type
->defaultAtts
= (DEFAULT_ATTRIBUTE
*)MALLOC(
6400 parser
, type
->allocDefaultAtts
* sizeof(DEFAULT_ATTRIBUTE
));
6401 if (! type
->defaultAtts
) {
6402 type
->allocDefaultAtts
= 0;
6406 DEFAULT_ATTRIBUTE
*temp
;
6408 /* Detect and prevent integer overflow */
6409 if (type
->allocDefaultAtts
> INT_MAX
/ 2) {
6413 int count
= type
->allocDefaultAtts
* 2;
6415 /* Detect and prevent integer overflow.
6416 * The preprocessor guard addresses the "always false" warning
6417 * from -Wtype-limits on platforms where
6418 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6419 #if UINT_MAX >= SIZE_MAX
6420 if ((unsigned)count
> (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE
)) {
6425 temp
= (DEFAULT_ATTRIBUTE
*)REALLOC(parser
, type
->defaultAtts
,
6426 (count
* sizeof(DEFAULT_ATTRIBUTE
)));
6429 type
->allocDefaultAtts
= count
;
6430 type
->defaultAtts
= temp
;
6433 att
= type
->defaultAtts
+ type
->nDefaultAtts
;
6436 att
->isCdata
= isCdata
;
6438 attId
->maybeTokenized
= XML_TRUE
;
6439 type
->nDefaultAtts
+= 1;
6444 setElementTypePrefix(XML_Parser parser
, ELEMENT_TYPE
*elementType
) {
6445 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
6446 const XML_Char
*name
;
6447 for (name
= elementType
->name
; *name
; name
++) {
6448 if (*name
== XML_T(ASCII_COLON
)) {
6451 for (s
= elementType
->name
; s
!= name
; s
++) {
6452 if (! poolAppendChar(&dtd
->pool
, *s
))
6455 if (! poolAppendChar(&dtd
->pool
, XML_T('\0')))
6457 prefix
= (PREFIX
*)lookup(parser
, &dtd
->prefixes
, poolStart(&dtd
->pool
),
6461 if (prefix
->name
== poolStart(&dtd
->pool
))
6462 poolFinish(&dtd
->pool
);
6464 poolDiscard(&dtd
->pool
);
6465 elementType
->prefix
= prefix
;
6472 static ATTRIBUTE_ID
*
6473 getAttributeId(XML_Parser parser
, const ENCODING
*enc
, const char *start
,
6475 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
6477 const XML_Char
*name
;
6478 if (! poolAppendChar(&dtd
->pool
, XML_T('\0')))
6480 name
= poolStoreString(&dtd
->pool
, enc
, start
, end
);
6483 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6485 id
= (ATTRIBUTE_ID
*)lookup(parser
, &dtd
->attributeIds
, name
,
6486 sizeof(ATTRIBUTE_ID
));
6489 if (id
->name
!= name
)
6490 poolDiscard(&dtd
->pool
);
6492 poolFinish(&dtd
->pool
);
6495 else if (name
[0] == XML_T(ASCII_x
) && name
[1] == XML_T(ASCII_m
)
6496 && name
[2] == XML_T(ASCII_l
) && name
[3] == XML_T(ASCII_n
)
6497 && name
[4] == XML_T(ASCII_s
)
6498 && (name
[5] == XML_T('\0') || name
[5] == XML_T(ASCII_COLON
))) {
6499 if (name
[5] == XML_T('\0'))
6500 id
->prefix
= &dtd
->defaultPrefix
;
6502 id
->prefix
= (PREFIX
*)lookup(parser
, &dtd
->prefixes
, name
+ 6,
6504 id
->xmlns
= XML_TRUE
;
6507 for (i
= 0; name
[i
]; i
++) {
6508 /* attributes without prefix are *not* in the default namespace */
6509 if (name
[i
] == XML_T(ASCII_COLON
)) {
6511 for (j
= 0; j
< i
; j
++) {
6512 if (! poolAppendChar(&dtd
->pool
, name
[j
]))
6515 if (! poolAppendChar(&dtd
->pool
, XML_T('\0')))
6517 id
->prefix
= (PREFIX
*)lookup(parser
, &dtd
->prefixes
,
6518 poolStart(&dtd
->pool
), sizeof(PREFIX
));
6521 if (id
->prefix
->name
== poolStart(&dtd
->pool
))
6522 poolFinish(&dtd
->pool
);
6524 poolDiscard(&dtd
->pool
);
6533 #define CONTEXT_SEP XML_T(ASCII_FF)
6535 static const XML_Char
*
6536 getContext(XML_Parser parser
) {
6537 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
6538 HASH_TABLE_ITER iter
;
6539 XML_Bool needSep
= XML_FALSE
;
6541 if (dtd
->defaultPrefix
.binding
) {
6544 if (! poolAppendChar(&parser
->m_tempPool
, XML_T(ASCII_EQUALS
)))
6546 len
= dtd
->defaultPrefix
.binding
->uriLen
;
6547 if (parser
->m_namespaceSeparator
)
6549 for (i
= 0; i
< len
; i
++) {
6550 if (! poolAppendChar(&parser
->m_tempPool
,
6551 dtd
->defaultPrefix
.binding
->uri
[i
])) {
6552 /* Because of memory caching, I don't believe this line can be
6555 * This is part of a loop copying the default prefix binding
6556 * URI into the parser's temporary string pool. Previously,
6557 * that URI was copied into the same string pool, with a
6558 * terminating NUL character, as part of setContext(). When
6559 * the pool was cleared, that leaves a block definitely big
6560 * enough to hold the URI on the free block list of the pool.
6561 * The URI copy in getContext() therefore cannot run out of
6564 * If the pool is used between the setContext() and
6565 * getContext() calls, the worst it can do is leave a bigger
6566 * block on the front of the free list. Given that this is
6567 * all somewhat inobvious and program logic can be changed, we
6568 * don't delete the line but we do exclude it from the test
6569 * coverage statistics.
6571 return NULL
; /* LCOV_EXCL_LINE */
6577 hashTableIterInit(&iter
, &(dtd
->prefixes
));
6582 PREFIX
*prefix
= (PREFIX
*)hashTableIterNext(&iter
);
6585 if (! prefix
->binding
) {
6586 /* This test appears to be (justifiable) paranoia. There does
6587 * not seem to be a way of injecting a prefix without a binding
6588 * that doesn't get errored long before this function is called.
6589 * The test should remain for safety's sake, so we instead
6590 * exclude the following line from the coverage statistics.
6592 continue; /* LCOV_EXCL_LINE */
6594 if (needSep
&& ! poolAppendChar(&parser
->m_tempPool
, CONTEXT_SEP
))
6596 for (s
= prefix
->name
; *s
; s
++)
6597 if (! poolAppendChar(&parser
->m_tempPool
, *s
))
6599 if (! poolAppendChar(&parser
->m_tempPool
, XML_T(ASCII_EQUALS
)))
6601 len
= prefix
->binding
->uriLen
;
6602 if (parser
->m_namespaceSeparator
)
6604 for (i
= 0; i
< len
; i
++)
6605 if (! poolAppendChar(&parser
->m_tempPool
, prefix
->binding
->uri
[i
]))
6610 hashTableIterInit(&iter
, &(dtd
->generalEntities
));
6613 ENTITY
*e
= (ENTITY
*)hashTableIterNext(&iter
);
6618 if (needSep
&& ! poolAppendChar(&parser
->m_tempPool
, CONTEXT_SEP
))
6620 for (s
= e
->name
; *s
; s
++)
6621 if (! poolAppendChar(&parser
->m_tempPool
, *s
))
6626 if (! poolAppendChar(&parser
->m_tempPool
, XML_T('\0')))
6628 return parser
->m_tempPool
.start
;
6632 setContext(XML_Parser parser
, const XML_Char
*context
) {
6633 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
6634 const XML_Char
*s
= context
;
6636 while (*context
!= XML_T('\0')) {
6637 if (*s
== CONTEXT_SEP
|| *s
== XML_T('\0')) {
6639 if (! poolAppendChar(&parser
->m_tempPool
, XML_T('\0')))
6641 e
= (ENTITY
*)lookup(parser
, &dtd
->generalEntities
,
6642 poolStart(&parser
->m_tempPool
), 0);
6645 if (*s
!= XML_T('\0'))
6648 poolDiscard(&parser
->m_tempPool
);
6649 } else if (*s
== XML_T(ASCII_EQUALS
)) {
6651 if (poolLength(&parser
->m_tempPool
) == 0)
6652 prefix
= &dtd
->defaultPrefix
;
6654 if (! poolAppendChar(&parser
->m_tempPool
, XML_T('\0')))
6657 = (PREFIX
*)lookup(parser
, &dtd
->prefixes
,
6658 poolStart(&parser
->m_tempPool
), sizeof(PREFIX
));
6661 if (prefix
->name
== poolStart(&parser
->m_tempPool
)) {
6662 prefix
->name
= poolCopyString(&dtd
->pool
, prefix
->name
);
6666 poolDiscard(&parser
->m_tempPool
);
6668 for (context
= s
+ 1; *context
!= CONTEXT_SEP
&& *context
!= XML_T('\0');
6670 if (! poolAppendChar(&parser
->m_tempPool
, *context
))
6672 if (! poolAppendChar(&parser
->m_tempPool
, XML_T('\0')))
6674 if (addBinding(parser
, prefix
, NULL
, poolStart(&parser
->m_tempPool
),
6675 &parser
->m_inheritedBindings
)
6678 poolDiscard(&parser
->m_tempPool
);
6679 if (*context
!= XML_T('\0'))
6683 if (! poolAppendChar(&parser
->m_tempPool
, *s
))
6691 static void FASTCALL
6692 normalizePublicId(XML_Char
*publicId
) {
6693 XML_Char
*p
= publicId
;
6695 for (s
= publicId
; *s
; s
++) {
6700 if (p
!= publicId
&& p
[-1] != 0x20)
6707 if (p
!= publicId
&& p
[-1] == 0x20)
6713 dtdCreate(const XML_Memory_Handling_Suite
*ms
) {
6714 DTD
*p
= ms
->malloc_fcn(sizeof(DTD
));
6717 poolInit(&(p
->pool
), ms
);
6718 poolInit(&(p
->entityValuePool
), ms
);
6719 hashTableInit(&(p
->generalEntities
), ms
);
6720 hashTableInit(&(p
->elementTypes
), ms
);
6721 hashTableInit(&(p
->attributeIds
), ms
);
6722 hashTableInit(&(p
->prefixes
), ms
);
6724 p
->paramEntityRead
= XML_FALSE
;
6725 hashTableInit(&(p
->paramEntities
), ms
);
6726 #endif /* XML_DTD */
6727 p
->defaultPrefix
.name
= NULL
;
6728 p
->defaultPrefix
.binding
= NULL
;
6730 p
->in_eldecl
= XML_FALSE
;
6731 p
->scaffIndex
= NULL
;
6736 p
->contentStringLen
= 0;
6738 p
->keepProcessing
= XML_TRUE
;
6739 p
->hasParamEntityRefs
= XML_FALSE
;
6740 p
->standalone
= XML_FALSE
;
6745 dtdReset(DTD
*p
, const XML_Memory_Handling_Suite
*ms
) {
6746 HASH_TABLE_ITER iter
;
6747 hashTableIterInit(&iter
, &(p
->elementTypes
));
6749 ELEMENT_TYPE
*e
= (ELEMENT_TYPE
*)hashTableIterNext(&iter
);
6752 if (e
->allocDefaultAtts
!= 0)
6753 ms
->free_fcn(e
->defaultAtts
);
6755 hashTableClear(&(p
->generalEntities
));
6757 p
->paramEntityRead
= XML_FALSE
;
6758 hashTableClear(&(p
->paramEntities
));
6759 #endif /* XML_DTD */
6760 hashTableClear(&(p
->elementTypes
));
6761 hashTableClear(&(p
->attributeIds
));
6762 hashTableClear(&(p
->prefixes
));
6763 poolClear(&(p
->pool
));
6764 poolClear(&(p
->entityValuePool
));
6765 p
->defaultPrefix
.name
= NULL
;
6766 p
->defaultPrefix
.binding
= NULL
;
6768 p
->in_eldecl
= XML_FALSE
;
6770 ms
->free_fcn(p
->scaffIndex
);
6771 p
->scaffIndex
= NULL
;
6772 ms
->free_fcn(p
->scaffold
);
6778 p
->contentStringLen
= 0;
6780 p
->keepProcessing
= XML_TRUE
;
6781 p
->hasParamEntityRefs
= XML_FALSE
;
6782 p
->standalone
= XML_FALSE
;
6786 dtdDestroy(DTD
*p
, XML_Bool isDocEntity
, const XML_Memory_Handling_Suite
*ms
) {
6787 HASH_TABLE_ITER iter
;
6788 hashTableIterInit(&iter
, &(p
->elementTypes
));
6790 ELEMENT_TYPE
*e
= (ELEMENT_TYPE
*)hashTableIterNext(&iter
);
6793 if (e
->allocDefaultAtts
!= 0)
6794 ms
->free_fcn(e
->defaultAtts
);
6796 hashTableDestroy(&(p
->generalEntities
));
6798 hashTableDestroy(&(p
->paramEntities
));
6799 #endif /* XML_DTD */
6800 hashTableDestroy(&(p
->elementTypes
));
6801 hashTableDestroy(&(p
->attributeIds
));
6802 hashTableDestroy(&(p
->prefixes
));
6803 poolDestroy(&(p
->pool
));
6804 poolDestroy(&(p
->entityValuePool
));
6806 ms
->free_fcn(p
->scaffIndex
);
6807 ms
->free_fcn(p
->scaffold
);
6812 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6813 The new DTD has already been initialized.
6816 dtdCopy(XML_Parser oldParser
, DTD
*newDtd
, const DTD
*oldDtd
,
6817 const XML_Memory_Handling_Suite
*ms
) {
6818 HASH_TABLE_ITER iter
;
6820 /* Copy the prefix table. */
6822 hashTableIterInit(&iter
, &(oldDtd
->prefixes
));
6824 const XML_Char
*name
;
6825 const PREFIX
*oldP
= (PREFIX
*)hashTableIterNext(&iter
);
6828 name
= poolCopyString(&(newDtd
->pool
), oldP
->name
);
6831 if (! lookup(oldParser
, &(newDtd
->prefixes
), name
, sizeof(PREFIX
)))
6835 hashTableIterInit(&iter
, &(oldDtd
->attributeIds
));
6837 /* Copy the attribute id table. */
6841 const XML_Char
*name
;
6842 const ATTRIBUTE_ID
*oldA
= (ATTRIBUTE_ID
*)hashTableIterNext(&iter
);
6846 /* Remember to allocate the scratch byte before the name. */
6847 if (! poolAppendChar(&(newDtd
->pool
), XML_T('\0')))
6849 name
= poolCopyString(&(newDtd
->pool
), oldA
->name
);
6853 newA
= (ATTRIBUTE_ID
*)lookup(oldParser
, &(newDtd
->attributeIds
), name
,
6854 sizeof(ATTRIBUTE_ID
));
6857 newA
->maybeTokenized
= oldA
->maybeTokenized
;
6859 newA
->xmlns
= oldA
->xmlns
;
6860 if (oldA
->prefix
== &oldDtd
->defaultPrefix
)
6861 newA
->prefix
= &newDtd
->defaultPrefix
;
6863 newA
->prefix
= (PREFIX
*)lookup(oldParser
, &(newDtd
->prefixes
),
6864 oldA
->prefix
->name
, 0);
6868 /* Copy the element type table. */
6870 hashTableIterInit(&iter
, &(oldDtd
->elementTypes
));
6875 const XML_Char
*name
;
6876 const ELEMENT_TYPE
*oldE
= (ELEMENT_TYPE
*)hashTableIterNext(&iter
);
6879 name
= poolCopyString(&(newDtd
->pool
), oldE
->name
);
6882 newE
= (ELEMENT_TYPE
*)lookup(oldParser
, &(newDtd
->elementTypes
), name
,
6883 sizeof(ELEMENT_TYPE
));
6886 if (oldE
->nDefaultAtts
) {
6888 = ms
->malloc_fcn(oldE
->nDefaultAtts
* sizeof(DEFAULT_ATTRIBUTE
));
6889 if (! newE
->defaultAtts
) {
6894 newE
->idAtt
= (ATTRIBUTE_ID
*)lookup(oldParser
, &(newDtd
->attributeIds
),
6895 oldE
->idAtt
->name
, 0);
6896 newE
->allocDefaultAtts
= newE
->nDefaultAtts
= oldE
->nDefaultAtts
;
6898 newE
->prefix
= (PREFIX
*)lookup(oldParser
, &(newDtd
->prefixes
),
6899 oldE
->prefix
->name
, 0);
6900 for (i
= 0; i
< newE
->nDefaultAtts
; i
++) {
6901 newE
->defaultAtts
[i
].id
= (ATTRIBUTE_ID
*)lookup(
6902 oldParser
, &(newDtd
->attributeIds
), oldE
->defaultAtts
[i
].id
->name
, 0);
6903 newE
->defaultAtts
[i
].isCdata
= oldE
->defaultAtts
[i
].isCdata
;
6904 if (oldE
->defaultAtts
[i
].value
) {
6905 newE
->defaultAtts
[i
].value
6906 = poolCopyString(&(newDtd
->pool
), oldE
->defaultAtts
[i
].value
);
6907 if (! newE
->defaultAtts
[i
].value
)
6910 newE
->defaultAtts
[i
].value
= NULL
;
6914 /* Copy the entity tables. */
6915 if (! copyEntityTable(oldParser
, &(newDtd
->generalEntities
), &(newDtd
->pool
),
6916 &(oldDtd
->generalEntities
)))
6920 if (! copyEntityTable(oldParser
, &(newDtd
->paramEntities
), &(newDtd
->pool
),
6921 &(oldDtd
->paramEntities
)))
6923 newDtd
->paramEntityRead
= oldDtd
->paramEntityRead
;
6924 #endif /* XML_DTD */
6926 newDtd
->keepProcessing
= oldDtd
->keepProcessing
;
6927 newDtd
->hasParamEntityRefs
= oldDtd
->hasParamEntityRefs
;
6928 newDtd
->standalone
= oldDtd
->standalone
;
6930 /* Don't want deep copying for scaffolding */
6931 newDtd
->in_eldecl
= oldDtd
->in_eldecl
;
6932 newDtd
->scaffold
= oldDtd
->scaffold
;
6933 newDtd
->contentStringLen
= oldDtd
->contentStringLen
;
6934 newDtd
->scaffSize
= oldDtd
->scaffSize
;
6935 newDtd
->scaffLevel
= oldDtd
->scaffLevel
;
6936 newDtd
->scaffIndex
= oldDtd
->scaffIndex
;
6942 copyEntityTable(XML_Parser oldParser
, HASH_TABLE
*newTable
,
6943 STRING_POOL
*newPool
, const HASH_TABLE
*oldTable
) {
6944 HASH_TABLE_ITER iter
;
6945 const XML_Char
*cachedOldBase
= NULL
;
6946 const XML_Char
*cachedNewBase
= NULL
;
6948 hashTableIterInit(&iter
, oldTable
);
6952 const XML_Char
*name
;
6953 const ENTITY
*oldE
= (ENTITY
*)hashTableIterNext(&iter
);
6956 name
= poolCopyString(newPool
, oldE
->name
);
6959 newE
= (ENTITY
*)lookup(oldParser
, newTable
, name
, sizeof(ENTITY
));
6962 if (oldE
->systemId
) {
6963 const XML_Char
*tem
= poolCopyString(newPool
, oldE
->systemId
);
6966 newE
->systemId
= tem
;
6968 if (oldE
->base
== cachedOldBase
)
6969 newE
->base
= cachedNewBase
;
6971 cachedOldBase
= oldE
->base
;
6972 tem
= poolCopyString(newPool
, cachedOldBase
);
6975 cachedNewBase
= newE
->base
= tem
;
6978 if (oldE
->publicId
) {
6979 tem
= poolCopyString(newPool
, oldE
->publicId
);
6982 newE
->publicId
= tem
;
6986 = poolCopyStringN(newPool
, oldE
->textPtr
, oldE
->textLen
);
6989 newE
->textPtr
= tem
;
6990 newE
->textLen
= oldE
->textLen
;
6992 if (oldE
->notation
) {
6993 const XML_Char
*tem
= poolCopyString(newPool
, oldE
->notation
);
6996 newE
->notation
= tem
;
6998 newE
->is_param
= oldE
->is_param
;
6999 newE
->is_internal
= oldE
->is_internal
;
7004 #define INIT_POWER 6
7006 static XML_Bool FASTCALL
7007 keyeq(KEY s1
, KEY s2
) {
7008 for (; *s1
== *s2
; s1
++, s2
++)
7017 for (; *s
; s
++, len
++)
7023 copy_salt_to_sipkey(XML_Parser parser
, struct sipkey
*key
) {
7025 key
->k
[1] = get_hash_secret_salt(parser
);
7028 static unsigned long FASTCALL
7029 hash(XML_Parser parser
, KEY s
) {
7030 struct siphash state
;
7033 copy_salt_to_sipkey(parser
, &key
);
7034 sip24_init(&state
, &key
);
7035 sip24_update(&state
, s
, keylen(s
) * sizeof(XML_Char
));
7036 return (unsigned long)sip24_final(&state
);
7040 lookup(XML_Parser parser
, HASH_TABLE
*table
, KEY name
, size_t createSize
) {
7042 if (table
->size
== 0) {
7046 table
->power
= INIT_POWER
;
7047 /* table->size is a power of 2 */
7048 table
->size
= (size_t)1 << INIT_POWER
;
7049 tsize
= table
->size
* sizeof(NAMED
*);
7050 table
->v
= table
->mem
->malloc_fcn(tsize
);
7055 memset(table
->v
, 0, tsize
);
7056 i
= hash(parser
, name
) & ((unsigned long)table
->size
- 1);
7058 unsigned long h
= hash(parser
, name
);
7059 unsigned long mask
= (unsigned long)table
->size
- 1;
7060 unsigned char step
= 0;
7062 while (table
->v
[i
]) {
7063 if (keyeq(name
, table
->v
[i
]->name
))
7066 step
= PROBE_STEP(h
, mask
, table
->power
);
7067 i
< step
? (i
+= table
->size
- step
) : (i
-= step
);
7072 /* check for overflow (table is half full) */
7073 if (table
->used
>> (table
->power
- 1)) {
7074 unsigned char newPower
= table
->power
+ 1;
7076 /* Detect and prevent invalid shift */
7077 if (newPower
>= sizeof(unsigned long) * 8 /* bits per byte */) {
7081 size_t newSize
= (size_t)1 << newPower
;
7082 unsigned long newMask
= (unsigned long)newSize
- 1;
7084 /* Detect and prevent integer overflow */
7085 if (newSize
> (size_t)(-1) / sizeof(NAMED
*)) {
7089 size_t tsize
= newSize
* sizeof(NAMED
*);
7090 NAMED
**newV
= table
->mem
->malloc_fcn(tsize
);
7093 memset(newV
, 0, tsize
);
7094 for (i
= 0; i
< table
->size
; i
++)
7096 unsigned long newHash
= hash(parser
, table
->v
[i
]->name
);
7097 size_t j
= newHash
& newMask
;
7101 step
= PROBE_STEP(newHash
, newMask
, newPower
);
7102 j
< step
? (j
+= newSize
- step
) : (j
-= step
);
7104 newV
[j
] = table
->v
[i
];
7106 table
->mem
->free_fcn(table
->v
);
7108 table
->power
= newPower
;
7109 table
->size
= newSize
;
7112 while (table
->v
[i
]) {
7114 step
= PROBE_STEP(h
, newMask
, newPower
);
7115 i
< step
? (i
+= newSize
- step
) : (i
-= step
);
7119 table
->v
[i
] = table
->mem
->malloc_fcn(createSize
);
7122 memset(table
->v
[i
], 0, createSize
);
7123 table
->v
[i
]->name
= name
;
7128 static void FASTCALL
7129 hashTableClear(HASH_TABLE
*table
) {
7131 for (i
= 0; i
< table
->size
; i
++) {
7132 table
->mem
->free_fcn(table
->v
[i
]);
7138 static void FASTCALL
7139 hashTableDestroy(HASH_TABLE
*table
) {
7141 for (i
= 0; i
< table
->size
; i
++)
7142 table
->mem
->free_fcn(table
->v
[i
]);
7143 table
->mem
->free_fcn(table
->v
);
7146 static void FASTCALL
7147 hashTableInit(HASH_TABLE
*p
, const XML_Memory_Handling_Suite
*ms
) {
7155 static void FASTCALL
7156 hashTableIterInit(HASH_TABLE_ITER
*iter
, const HASH_TABLE
*table
) {
7158 iter
->end
= iter
->p
? iter
->p
+ table
->size
: NULL
;
7161 static NAMED
*FASTCALL
7162 hashTableIterNext(HASH_TABLE_ITER
*iter
) {
7163 while (iter
->p
!= iter
->end
) {
7164 NAMED
*tem
= *(iter
->p
)++;
7171 static void FASTCALL
7172 poolInit(STRING_POOL
*pool
, const XML_Memory_Handling_Suite
*ms
) {
7173 pool
->blocks
= NULL
;
7174 pool
->freeBlocks
= NULL
;
7181 static void FASTCALL
7182 poolClear(STRING_POOL
*pool
) {
7183 if (! pool
->freeBlocks
)
7184 pool
->freeBlocks
= pool
->blocks
;
7186 BLOCK
*p
= pool
->blocks
;
7188 BLOCK
*tem
= p
->next
;
7189 p
->next
= pool
->freeBlocks
;
7190 pool
->freeBlocks
= p
;
7194 pool
->blocks
= NULL
;
7200 static void FASTCALL
7201 poolDestroy(STRING_POOL
*pool
) {
7202 BLOCK
*p
= pool
->blocks
;
7204 BLOCK
*tem
= p
->next
;
7205 pool
->mem
->free_fcn(p
);
7208 p
= pool
->freeBlocks
;
7210 BLOCK
*tem
= p
->next
;
7211 pool
->mem
->free_fcn(p
);
7217 poolAppend(STRING_POOL
*pool
, const ENCODING
*enc
, const char *ptr
,
7219 if (! pool
->ptr
&& ! poolGrow(pool
))
7222 const enum XML_Convert_Result convert_res
= XmlConvert(
7223 enc
, &ptr
, end
, (ICHAR
**)&(pool
->ptr
), (ICHAR
*)pool
->end
);
7224 if ((convert_res
== XML_CONVERT_COMPLETED
)
7225 || (convert_res
== XML_CONVERT_INPUT_INCOMPLETE
))
7227 if (! poolGrow(pool
))
7233 static const XML_Char
*FASTCALL
7234 poolCopyString(STRING_POOL
*pool
, const XML_Char
*s
) {
7236 if (! poolAppendChar(pool
, *s
))
7244 static const XML_Char
*
7245 poolCopyStringN(STRING_POOL
*pool
, const XML_Char
*s
, int n
) {
7246 if (! pool
->ptr
&& ! poolGrow(pool
)) {
7247 /* The following line is unreachable given the current usage of
7248 * poolCopyStringN(). Currently it is called from exactly one
7249 * place to copy the text of a simple general entity. By that
7250 * point, the name of the entity is already stored in the pool, so
7251 * pool->ptr cannot be NULL.
7253 * If poolCopyStringN() is used elsewhere as it well might be,
7254 * this line may well become executable again. Regardless, this
7255 * sort of check shouldn't be removed lightly, so we just exclude
7256 * it from the coverage statistics.
7258 return NULL
; /* LCOV_EXCL_LINE */
7260 for (; n
> 0; --n
, s
++) {
7261 if (! poolAppendChar(pool
, *s
))
7269 static const XML_Char
*FASTCALL
7270 poolAppendString(STRING_POOL
*pool
, const XML_Char
*s
) {
7272 if (! poolAppendChar(pool
, *s
))
7280 poolStoreString(STRING_POOL
*pool
, const ENCODING
*enc
, const char *ptr
,
7282 if (! poolAppend(pool
, enc
, ptr
, end
))
7284 if (pool
->ptr
== pool
->end
&& ! poolGrow(pool
))
7291 poolBytesToAllocateFor(int blockSize
) {
7292 /* Unprotected math would be:
7293 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7295 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7296 ** For a + b * c we check b * c in isolation first, so that addition of a
7297 ** on top has no chance of making us accept a small non-negative number
7299 const size_t stretch
= sizeof(XML_Char
); /* can be 4 bytes */
7304 if (blockSize
> (int)(INT_MAX
/ stretch
))
7308 const int stretchedBlockSize
= blockSize
* (int)stretch
;
7309 const int bytesToAllocate
7310 = (int)(offsetof(BLOCK
, s
) + (unsigned)stretchedBlockSize
);
7311 if (bytesToAllocate
< 0)
7314 return (size_t)bytesToAllocate
;
7318 static XML_Bool FASTCALL
7319 poolGrow(STRING_POOL
*pool
) {
7320 if (pool
->freeBlocks
) {
7321 if (pool
->start
== 0) {
7322 pool
->blocks
= pool
->freeBlocks
;
7323 pool
->freeBlocks
= pool
->freeBlocks
->next
;
7324 pool
->blocks
->next
= NULL
;
7325 pool
->start
= pool
->blocks
->s
;
7326 pool
->end
= pool
->start
+ pool
->blocks
->size
;
7327 pool
->ptr
= pool
->start
;
7330 if (pool
->end
- pool
->start
< pool
->freeBlocks
->size
) {
7331 BLOCK
*tem
= pool
->freeBlocks
->next
;
7332 pool
->freeBlocks
->next
= pool
->blocks
;
7333 pool
->blocks
= pool
->freeBlocks
;
7334 pool
->freeBlocks
= tem
;
7335 memcpy(pool
->blocks
->s
, pool
->start
,
7336 (pool
->end
- pool
->start
) * sizeof(XML_Char
));
7337 pool
->ptr
= pool
->blocks
->s
+ (pool
->ptr
- pool
->start
);
7338 pool
->start
= pool
->blocks
->s
;
7339 pool
->end
= pool
->start
+ pool
->blocks
->size
;
7343 if (pool
->blocks
&& pool
->start
== pool
->blocks
->s
) {
7345 int blockSize
= (int)((unsigned)(pool
->end
- pool
->start
) * 2U);
7346 size_t bytesToAllocate
;
7348 /* NOTE: Needs to be calculated prior to calling `realloc`
7349 to avoid dangling pointers: */
7350 const ptrdiff_t offsetInsideBlock
= pool
->ptr
- pool
->start
;
7352 if (blockSize
< 0) {
7353 /* This condition traps a situation where either more than
7354 * INT_MAX/2 bytes have already been allocated. This isn't
7355 * readily testable, since it is unlikely that an average
7356 * machine will have that much memory, so we exclude it from the
7357 * coverage statistics.
7359 return XML_FALSE
; /* LCOV_EXCL_LINE */
7362 bytesToAllocate
= poolBytesToAllocateFor(blockSize
);
7363 if (bytesToAllocate
== 0)
7366 temp
= (BLOCK
*)pool
->mem
->realloc_fcn(pool
->blocks
,
7367 (unsigned)bytesToAllocate
);
7370 pool
->blocks
= temp
;
7371 pool
->blocks
->size
= blockSize
;
7372 pool
->ptr
= pool
->blocks
->s
+ offsetInsideBlock
;
7373 pool
->start
= pool
->blocks
->s
;
7374 pool
->end
= pool
->start
+ blockSize
;
7377 int blockSize
= (int)(pool
->end
- pool
->start
);
7378 size_t bytesToAllocate
;
7380 if (blockSize
< 0) {
7381 /* This condition traps a situation where either more than
7382 * INT_MAX bytes have already been allocated (which is prevented
7383 * by various pieces of program logic, not least this one, never
7384 * mind the unlikelihood of actually having that much memory) or
7385 * the pool control fields have been corrupted (which could
7386 * conceivably happen in an extremely buggy user handler
7387 * function). Either way it isn't readily testable, so we
7388 * exclude it from the coverage statistics.
7390 return XML_FALSE
; /* LCOV_EXCL_LINE */
7393 if (blockSize
< INIT_BLOCK_SIZE
)
7394 blockSize
= INIT_BLOCK_SIZE
;
7396 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7397 if ((int)((unsigned)blockSize
* 2U) < 0) {
7403 bytesToAllocate
= poolBytesToAllocateFor(blockSize
);
7404 if (bytesToAllocate
== 0)
7407 tem
= pool
->mem
->malloc_fcn(bytesToAllocate
);
7410 tem
->size
= blockSize
;
7411 tem
->next
= pool
->blocks
;
7413 if (pool
->ptr
!= pool
->start
)
7414 memcpy(tem
->s
, pool
->start
, (pool
->ptr
- pool
->start
) * sizeof(XML_Char
));
7415 pool
->ptr
= tem
->s
+ (pool
->ptr
- pool
->start
);
7416 pool
->start
= tem
->s
;
7417 pool
->end
= tem
->s
+ blockSize
;
7423 nextScaffoldPart(XML_Parser parser
) {
7424 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
7425 CONTENT_SCAFFOLD
*me
;
7428 if (! dtd
->scaffIndex
) {
7429 dtd
->scaffIndex
= (int *)MALLOC(parser
, parser
->m_groupSize
* sizeof(int));
7430 if (! dtd
->scaffIndex
)
7432 dtd
->scaffIndex
[0] = 0;
7435 if (dtd
->scaffCount
>= dtd
->scaffSize
) {
7436 CONTENT_SCAFFOLD
*temp
;
7437 if (dtd
->scaffold
) {
7438 /* Detect and prevent integer overflow */
7439 if (dtd
->scaffSize
> UINT_MAX
/ 2u) {
7442 /* Detect and prevent integer overflow.
7443 * The preprocessor guard addresses the "always false" warning
7444 * from -Wtype-limits on platforms where
7445 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7446 #if UINT_MAX >= SIZE_MAX
7447 if (dtd
->scaffSize
> (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD
)) {
7452 temp
= (CONTENT_SCAFFOLD
*)REALLOC(
7453 parser
, dtd
->scaffold
, dtd
->scaffSize
* 2 * sizeof(CONTENT_SCAFFOLD
));
7456 dtd
->scaffSize
*= 2;
7458 temp
= (CONTENT_SCAFFOLD
*)MALLOC(parser
, INIT_SCAFFOLD_ELEMENTS
7459 * sizeof(CONTENT_SCAFFOLD
));
7462 dtd
->scaffSize
= INIT_SCAFFOLD_ELEMENTS
;
7464 dtd
->scaffold
= temp
;
7466 next
= dtd
->scaffCount
++;
7467 me
= &dtd
->scaffold
[next
];
7468 if (dtd
->scaffLevel
) {
7469 CONTENT_SCAFFOLD
*parent
7470 = &dtd
->scaffold
[dtd
->scaffIndex
[dtd
->scaffLevel
- 1]];
7471 if (parent
->lastchild
) {
7472 dtd
->scaffold
[parent
->lastchild
].nextsib
= next
;
7474 if (! parent
->childcnt
)
7475 parent
->firstchild
= next
;
7476 parent
->lastchild
= next
;
7479 me
->firstchild
= me
->lastchild
= me
->childcnt
= me
->nextsib
= 0;
7483 static XML_Content
*
7484 build_model(XML_Parser parser
) {
7485 /* Function build_model transforms the existing parser->m_dtd->scaffold
7486 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7487 * XML_Content tree nodes followed by a gapless list of zero-terminated
7489 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
7491 XML_Char
*str
; /* the current string writing location */
7493 /* Detect and prevent integer overflow.
7494 * The preprocessor guard addresses the "always false" warning
7495 * from -Wtype-limits on platforms where
7496 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7497 #if UINT_MAX >= SIZE_MAX
7498 if (dtd
->scaffCount
> (size_t)(-1) / sizeof(XML_Content
)) {
7501 if (dtd
->contentStringLen
> (size_t)(-1) / sizeof(XML_Char
)) {
7505 if (dtd
->scaffCount
* sizeof(XML_Content
)
7506 > (size_t)(-1) - dtd
->contentStringLen
* sizeof(XML_Char
)) {
7510 const size_t allocsize
= (dtd
->scaffCount
* sizeof(XML_Content
)
7511 + (dtd
->contentStringLen
* sizeof(XML_Char
)));
7513 ret
= (XML_Content
*)MALLOC(parser
, allocsize
);
7517 /* What follows is an iterative implementation (of what was previously done
7518 * recursively in a dedicated function called "build_node". The old recursive
7519 * build_node could be forced into stack exhaustion from input as small as a
7520 * few megabyte, and so that was a security issue. Hence, a function call
7521 * stack is avoided now by resolving recursion.)
7523 * The iterative approach works as follows:
7525 * - We have two writing pointers, both walking up the result array; one does
7526 * the work, the other creates "jobs" for its colleague to do, and leads
7529 * - The faster one, pointer jobDest, always leads and writes "what job
7530 * to do" by the other, once they reach that place in the
7531 * array: leader "jobDest" stores the source node array index (relative
7532 * to array dtd->scaffold) in field "numchildren".
7534 * - The slower one, pointer dest, looks at the value stored in the
7535 * "numchildren" field (which actually holds a source node array index
7536 * at that time) and puts the real data from dtd->scaffold in.
7538 * - Before the loop starts, jobDest writes source array index 0
7539 * (where the root node is located) so that dest will have something to do
7540 * when it starts operation.
7542 * - Whenever nodes with children are encountered, jobDest appends
7543 * them as new jobs, in order. As a result, tree node siblings are
7544 * adjacent in the resulting array, for example:
7546 * [0] root, has two children
7547 * [1] first child of 0, has three children
7548 * [3] first child of 1, does not have children
7549 * [4] second child of 1, does not have children
7550 * [5] third child of 1, does not have children
7551 * [2] second child of 0, does not have children
7553 * Or (the same data) presented in flat array view:
7555 * [0] root, has two children
7557 * [1] first child of 0, has three children
7558 * [2] second child of 0, does not have children
7560 * [3] first child of 1, does not have children
7561 * [4] second child of 1, does not have children
7562 * [5] third child of 1, does not have children
7564 * - The algorithm repeats until all target array indices have been processed.
7566 XML_Content
*dest
= ret
; /* tree node writing location, moves upwards */
7567 XML_Content
*const destLimit
= &ret
[dtd
->scaffCount
];
7568 XML_Content
*jobDest
= ret
; /* next free writing location in target array */
7569 str
= (XML_Char
*)&ret
[dtd
->scaffCount
];
7571 /* Add the starting job, the root node (index 0) of the source tree */
7572 (jobDest
++)->numchildren
= 0;
7574 for (; dest
< destLimit
; dest
++) {
7575 /* Retrieve source tree array index from job storage */
7576 const int src_node
= (int)dest
->numchildren
;
7579 dest
->type
= dtd
->scaffold
[src_node
].type
;
7580 dest
->quant
= dtd
->scaffold
[src_node
].quant
;
7581 if (dest
->type
== XML_CTYPE_NAME
) {
7582 const XML_Char
*src
;
7584 src
= dtd
->scaffold
[src_node
].name
;
7591 dest
->numchildren
= 0;
7592 dest
->children
= NULL
;
7597 dest
->numchildren
= dtd
->scaffold
[src_node
].childcnt
;
7598 dest
->children
= jobDest
;
7600 /* Append scaffold indices of children to array */
7601 for (i
= 0, cn
= dtd
->scaffold
[src_node
].firstchild
;
7602 i
< dest
->numchildren
; i
++, cn
= dtd
->scaffold
[cn
].nextsib
)
7603 (jobDest
++)->numchildren
= (unsigned int)cn
;
7610 static ELEMENT_TYPE
*
7611 getElementType(XML_Parser parser
, const ENCODING
*enc
, const char *ptr
,
7613 DTD
*const dtd
= parser
->m_dtd
; /* save one level of indirection */
7614 const XML_Char
*name
= poolStoreString(&dtd
->pool
, enc
, ptr
, end
);
7619 ret
= (ELEMENT_TYPE
*)lookup(parser
, &dtd
->elementTypes
, name
,
7620 sizeof(ELEMENT_TYPE
));
7623 if (ret
->name
!= name
)
7624 poolDiscard(&dtd
->pool
);
7626 poolFinish(&dtd
->pool
);
7627 if (! setElementTypePrefix(parser
, ret
))
7634 copyString(const XML_Char
*s
, const XML_Memory_Handling_Suite
*memsuite
) {
7635 size_t charsRequired
= 0;
7638 /* First determine how long the string is */
7639 while (s
[charsRequired
] != 0) {
7642 /* Include the terminator */
7645 /* Now allocate space for the copy */
7646 result
= memsuite
->malloc_fcn(charsRequired
* sizeof(XML_Char
));
7649 /* Copy the original into place */
7650 memcpy(result
, s
, charsRequired
* sizeof(XML_Char
));
7657 accountingGetCurrentAmplification(XML_Parser rootParser
) {
7658 const XmlBigCount countBytesOutput
7659 = rootParser
->m_accounting
.countBytesDirect
7660 + rootParser
->m_accounting
.countBytesIndirect
;
7661 const float amplificationFactor
7662 = rootParser
->m_accounting
.countBytesDirect
7664 / (float)(rootParser
->m_accounting
.countBytesDirect
))
7666 assert(! rootParser
->m_parentParser
);
7667 return amplificationFactor
;
7671 accountingReportStats(XML_Parser originParser
, const char *epilog
) {
7672 const XML_Parser rootParser
= getRootParserOf(originParser
, NULL
);
7673 assert(! rootParser
->m_parentParser
);
7675 if (rootParser
->m_accounting
.debugLevel
< 1) {
7679 const float amplificationFactor
7680 = accountingGetCurrentAmplification(rootParser
);
7682 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7683 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7684 (void *)rootParser
, rootParser
->m_accounting
.countBytesDirect
,
7685 rootParser
->m_accounting
.countBytesIndirect
,
7686 (double)amplificationFactor
, epilog
);
7690 accountingOnAbort(XML_Parser originParser
) {
7691 accountingReportStats(originParser
, " ABORTING\n");
7695 accountingReportDiff(XML_Parser rootParser
,
7696 unsigned int levelsAwayFromRootParser
, const char *before
,
7697 const char *after
, ptrdiff_t bytesMore
, int source_line
,
7698 enum XML_Account account
) {
7699 assert(! rootParser
->m_parentParser
);
7702 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7703 bytesMore
, (account
== XML_ACCOUNT_DIRECT
) ? "DIR" : "EXP",
7704 levelsAwayFromRootParser
, source_line
, 10, "");
7706 const char ellipis
[] = "[..]";
7707 const size_t ellipsisLength
= sizeof(ellipis
) /* because compile-time */ - 1;
7708 const unsigned int contextLength
= 10;
7710 /* Note: Performance is of no concern here */
7711 const char *walker
= before
;
7712 if ((rootParser
->m_accounting
.debugLevel
>= 3)
7714 <= (ptrdiff_t)(contextLength
+ ellipsisLength
+ contextLength
)) {
7715 for (; walker
< after
; walker
++) {
7716 fprintf(stderr
, "%s", unsignedCharToPrintable(walker
[0]));
7719 for (; walker
< before
+ contextLength
; walker
++) {
7720 fprintf(stderr
, "%s", unsignedCharToPrintable(walker
[0]));
7722 fprintf(stderr
, ellipis
);
7723 walker
= after
- contextLength
;
7724 for (; walker
< after
; walker
++) {
7725 fprintf(stderr
, "%s", unsignedCharToPrintable(walker
[0]));
7728 fprintf(stderr
, "\"\n");
7732 accountingDiffTolerated(XML_Parser originParser
, int tok
, const char *before
,
7733 const char *after
, int source_line
,
7734 enum XML_Account account
) {
7735 /* Note: We need to check the token type *first* to be sure that
7736 * we can even access variable <after>, safely.
7737 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7739 case XML_TOK_INVALID
:
7740 case XML_TOK_PARTIAL
:
7741 case XML_TOK_PARTIAL_CHAR
:
7746 if (account
== XML_ACCOUNT_NONE
)
7747 return XML_TRUE
; /* because these bytes have been accounted for, already */
7749 unsigned int levelsAwayFromRootParser
;
7750 const XML_Parser rootParser
7751 = getRootParserOf(originParser
, &levelsAwayFromRootParser
);
7752 assert(! rootParser
->m_parentParser
);
7755 = (account
== XML_ACCOUNT_DIRECT
) && (originParser
== rootParser
);
7756 const ptrdiff_t bytesMore
= after
- before
;
7758 XmlBigCount
*const additionTarget
7759 = isDirect
? &rootParser
->m_accounting
.countBytesDirect
7760 : &rootParser
->m_accounting
.countBytesIndirect
;
7762 /* Detect and avoid integer overflow */
7763 if (*additionTarget
> (XmlBigCount
)(-1) - (XmlBigCount
)bytesMore
)
7765 *additionTarget
+= bytesMore
;
7767 const XmlBigCount countBytesOutput
7768 = rootParser
->m_accounting
.countBytesDirect
7769 + rootParser
->m_accounting
.countBytesIndirect
;
7770 const float amplificationFactor
7771 = accountingGetCurrentAmplification(rootParser
);
7772 const XML_Bool tolerated
7773 = (countBytesOutput
< rootParser
->m_accounting
.activationThresholdBytes
)
7774 || (amplificationFactor
7775 <= rootParser
->m_accounting
.maximumAmplificationFactor
);
7777 if (rootParser
->m_accounting
.debugLevel
>= 2) {
7778 accountingReportStats(rootParser
, "");
7779 accountingReportDiff(rootParser
, levelsAwayFromRootParser
, before
, after
,
7780 bytesMore
, source_line
, account
);
7787 testingAccountingGetCountBytesDirect(XML_Parser parser
) {
7790 return parser
->m_accounting
.countBytesDirect
;
7794 testingAccountingGetCountBytesIndirect(XML_Parser parser
) {
7797 return parser
->m_accounting
.countBytesIndirect
;
7801 entityTrackingReportStats(XML_Parser rootParser
, ENTITY
*entity
,
7802 const char *action
, int sourceLine
) {
7803 assert(! rootParser
->m_parentParser
);
7804 if (rootParser
->m_entity_stats
.debugLevel
< 1)
7807 # if defined(XML_UNICODE)
7808 const char *const entityName
= "[..]";
7810 const char *const entityName
= entity
->name
;
7815 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7816 (void *)rootParser
, rootParser
->m_entity_stats
.countEverOpened
,
7817 rootParser
->m_entity_stats
.currentDepth
,
7818 rootParser
->m_entity_stats
.maximumDepthSeen
,
7819 (rootParser
->m_entity_stats
.currentDepth
- 1) * 2, "",
7820 entity
->is_param
? "%" : "&", entityName
, action
, entity
->textLen
,
7825 entityTrackingOnOpen(XML_Parser originParser
, ENTITY
*entity
, int sourceLine
) {
7826 const XML_Parser rootParser
= getRootParserOf(originParser
, NULL
);
7827 assert(! rootParser
->m_parentParser
);
7829 rootParser
->m_entity_stats
.countEverOpened
++;
7830 rootParser
->m_entity_stats
.currentDepth
++;
7831 if (rootParser
->m_entity_stats
.currentDepth
7832 > rootParser
->m_entity_stats
.maximumDepthSeen
) {
7833 rootParser
->m_entity_stats
.maximumDepthSeen
++;
7836 entityTrackingReportStats(rootParser
, entity
, "OPEN ", sourceLine
);
7840 entityTrackingOnClose(XML_Parser originParser
, ENTITY
*entity
, int sourceLine
) {
7841 const XML_Parser rootParser
= getRootParserOf(originParser
, NULL
);
7842 assert(! rootParser
->m_parentParser
);
7844 entityTrackingReportStats(rootParser
, entity
, "CLOSE", sourceLine
);
7845 rootParser
->m_entity_stats
.currentDepth
--;
7849 getRootParserOf(XML_Parser parser
, unsigned int *outLevelDiff
) {
7850 XML_Parser rootParser
= parser
;
7851 unsigned int stepsTakenUpwards
= 0;
7852 while (rootParser
->m_parentParser
) {
7853 rootParser
= rootParser
->m_parentParser
;
7854 stepsTakenUpwards
++;
7856 assert(! rootParser
->m_parentParser
);
7857 if (outLevelDiff
!= NULL
) {
7858 *outLevelDiff
= stepsTakenUpwards
;
7864 unsignedCharToPrintable(unsigned char c
) {
8379 assert(0); /* never gets here */
8382 assert(0); /* never gets here */
8385 #endif /* XML_DTD */
8387 static unsigned long
8388 getDebugLevel(const char *variableName
, unsigned long defaultDebugLevel
) {
8389 const char *const valueOrNull
= getenv(variableName
);
8390 if (valueOrNull
== NULL
) {
8391 return defaultDebugLevel
;
8393 const char *const value
= valueOrNull
;
8396 char *afterValue
= (char *)value
;
8397 unsigned long debugLevel
= strtoul(value
, &afterValue
, 10);
8398 if ((errno
!= 0) || (afterValue
[0] != '\0')) {
8400 return defaultDebugLevel
;