usr.sbin/makefs/hammer2: Sync with recent userspace ondisk.c changes
[dragonfly.git] / contrib / expat / lib / xmlparse.c
blobb6c2eca97567baa588b1c5df04f4ebabd15e623e
1 /* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh@google.com>
39 Licensed under the MIT license:
41 Permission is hereby granted, free of charge, to any person obtaining
42 a copy of this software and associated documentation files (the
43 "Software"), to deal in the Software without restriction, including
44 without limitation the rights to use, copy, modify, merge, publish,
45 distribute, sublicense, and/or sell copies of the Software, and to permit
46 persons to whom the Software is furnished to do so, subject to the
47 following conditions:
49 The above copyright notice and this permission notice shall be included
50 in all copies or substantial portions of the Software.
52 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
53 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
54 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
55 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
56 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
57 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
58 USE OR OTHER DEALINGS IN THE SOFTWARE.
61 #define XML_BUILDING_EXPAT 1
63 #include <expat_config.h>
65 #if ! defined(_GNU_SOURCE)
66 # define _GNU_SOURCE 1 /* syscall prototype */
67 #endif
69 #ifdef _WIN32
70 /* force stdlib to define rand_s() */
71 # if ! defined(_CRT_RAND_S)
72 # define _CRT_RAND_S
73 # endif
74 #endif
76 #include <stddef.h>
77 #include <string.h> /* memset(), memcpy() */
78 #include <assert.h>
79 #include <limits.h> /* UINT_MAX */
80 #include <stdio.h> /* fprintf */
81 #include <stdlib.h> /* getenv, rand_s */
82 #include <stdint.h> /* uintptr_t */
83 #include <math.h> /* isnan */
85 #ifdef _WIN32
86 # define getpid GetCurrentProcessId
87 #else
88 # include <sys/time.h> /* gettimeofday() */
89 # include <sys/types.h> /* getpid() */
90 # include <unistd.h> /* getpid() */
91 # include <fcntl.h> /* O_RDONLY */
92 # include <errno.h>
93 #endif
95 #ifdef _WIN32
96 # include "winconfig.h"
97 #endif
99 #include "ascii.h"
100 #include "expat.h"
101 #include "siphash.h"
103 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
104 # if defined(HAVE_GETRANDOM)
105 # include <sys/random.h> /* getrandom */
106 # else
107 # include <unistd.h> /* syscall */
108 # include <sys/syscall.h> /* SYS_getrandom */
109 # endif
110 # if ! defined(GRND_NONBLOCK)
111 # define GRND_NONBLOCK 0x0001
112 # endif /* defined(GRND_NONBLOCK) */
113 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
115 #if defined(HAVE_LIBBSD) \
116 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
117 # include <bsd/stdlib.h>
118 #endif
120 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
121 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
122 #endif
124 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
125 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
126 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
127 && ! defined(XML_POOR_ENTROPY)
128 # error You do not have support for any sources of high quality entropy \
129 enabled. For end user security, that is probably not what you want. \
131 Your options include: \
132 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
133 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
134 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
135 * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
136 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
137 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
138 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
139 * Windows >=Vista (rand_s): _WIN32. \
141 If insist on not using any of these, bypass this error by defining \
142 XML_POOR_ENTROPY; you have been warned. \
144 If you have reasons to patch this detection code away or need changes \
145 to the build system, please open a bug. Thank you!
146 #endif
148 #ifdef XML_UNICODE
149 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
150 # define XmlConvert XmlUtf16Convert
151 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
152 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
153 # define XmlEncode XmlUtf16Encode
154 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
155 typedef unsigned short ICHAR;
156 #else
157 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
158 # define XmlConvert XmlUtf8Convert
159 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
160 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
161 # define XmlEncode XmlUtf8Encode
162 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
163 typedef char ICHAR;
164 #endif
166 #ifndef XML_NS
168 # define XmlInitEncodingNS XmlInitEncoding
169 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
170 # undef XmlGetInternalEncodingNS
171 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
172 # define XmlParseXmlDeclNS XmlParseXmlDecl
174 #endif
176 #ifdef XML_UNICODE
178 # ifdef XML_UNICODE_WCHAR_T
179 # define XML_T(x) (const wchar_t) x
180 # define XML_L(x) L##x
181 # else
182 # define XML_T(x) (const unsigned short)x
183 # define XML_L(x) x
184 # endif
186 #else
188 # define XML_T(x) x
189 # define XML_L(x) x
191 #endif
193 /* Round up n to be a multiple of sz, where sz is a power of 2. */
194 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
196 /* Do safe (NULL-aware) pointer arithmetic */
197 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
199 #include "internal.h"
200 #include "xmltok.h"
201 #include "xmlrole.h"
203 typedef const XML_Char *KEY;
205 typedef struct {
206 KEY name;
207 } NAMED;
209 typedef struct {
210 NAMED **v;
211 unsigned char power;
212 size_t size;
213 size_t used;
214 const XML_Memory_Handling_Suite *mem;
215 } HASH_TABLE;
217 static size_t keylen(KEY s);
219 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
221 /* For probing (after a collision) we need a step size relative prime
222 to the hash table size, which is a power of 2. We use double-hashing,
223 since we can calculate a second hash value cheaply by taking those bits
224 of the first hash value that were discarded (masked out) when the table
225 index was calculated: index = hash & mask, where mask = table->size - 1.
226 We limit the maximum step size to table->size / 4 (mask >> 2) and make
227 it odd, since odd numbers are always relative prime to a power of 2.
229 #define SECOND_HASH(hash, mask, power) \
230 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
231 #define PROBE_STEP(hash, mask, power) \
232 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
234 typedef struct {
235 NAMED **p;
236 NAMED **end;
237 } HASH_TABLE_ITER;
239 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
240 #define INIT_DATA_BUF_SIZE 1024
241 #define INIT_ATTS_SIZE 16
242 #define INIT_ATTS_VERSION 0xFFFFFFFF
243 #define INIT_BLOCK_SIZE 1024
244 #define INIT_BUFFER_SIZE 1024
246 #define EXPAND_SPARE 24
248 typedef struct binding {
249 struct prefix *prefix;
250 struct binding *nextTagBinding;
251 struct binding *prevPrefixBinding;
252 const struct attribute_id *attId;
253 XML_Char *uri;
254 int uriLen;
255 int uriAlloc;
256 } BINDING;
258 typedef struct prefix {
259 const XML_Char *name;
260 BINDING *binding;
261 } PREFIX;
263 typedef struct {
264 const XML_Char *str;
265 const XML_Char *localPart;
266 const XML_Char *prefix;
267 int strLen;
268 int uriLen;
269 int prefixLen;
270 } TAG_NAME;
272 /* TAG represents an open element.
273 The name of the element is stored in both the document and API
274 encodings. The memory buffer 'buf' is a separately-allocated
275 memory area which stores the name. During the XML_Parse()/
276 XMLParseBuffer() when the element is open, the memory for the 'raw'
277 version of the name (in the document encoding) is shared with the
278 document buffer. If the element is open across calls to
279 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
280 contain the 'raw' name as well.
282 A parser re-uses these structures, maintaining a list of allocated
283 TAG objects in a free list.
285 typedef struct tag {
286 struct tag *parent; /* parent of this element */
287 const char *rawName; /* tagName in the original encoding */
288 int rawNameLength;
289 TAG_NAME name; /* tagName in the API encoding */
290 char *buf; /* buffer for name components */
291 char *bufEnd; /* end of the buffer */
292 BINDING *bindings;
293 } TAG;
295 typedef struct {
296 const XML_Char *name;
297 const XML_Char *textPtr;
298 int textLen; /* length in XML_Chars */
299 int processed; /* # of processed bytes - when suspended */
300 const XML_Char *systemId;
301 const XML_Char *base;
302 const XML_Char *publicId;
303 const XML_Char *notation;
304 XML_Bool open;
305 XML_Bool is_param;
306 XML_Bool is_internal; /* true if declared in internal subset outside PE */
307 } ENTITY;
309 typedef struct {
310 enum XML_Content_Type type;
311 enum XML_Content_Quant quant;
312 const XML_Char *name;
313 int firstchild;
314 int lastchild;
315 int childcnt;
316 int nextsib;
317 } CONTENT_SCAFFOLD;
319 #define INIT_SCAFFOLD_ELEMENTS 32
321 typedef struct block {
322 struct block *next;
323 int size;
324 XML_Char s[1];
325 } BLOCK;
327 typedef struct {
328 BLOCK *blocks;
329 BLOCK *freeBlocks;
330 const XML_Char *end;
331 XML_Char *ptr;
332 XML_Char *start;
333 const XML_Memory_Handling_Suite *mem;
334 } STRING_POOL;
336 /* The XML_Char before the name is used to determine whether
337 an attribute has been specified. */
338 typedef struct attribute_id {
339 XML_Char *name;
340 PREFIX *prefix;
341 XML_Bool maybeTokenized;
342 XML_Bool xmlns;
343 } ATTRIBUTE_ID;
345 typedef struct {
346 const ATTRIBUTE_ID *id;
347 XML_Bool isCdata;
348 const XML_Char *value;
349 } DEFAULT_ATTRIBUTE;
351 typedef struct {
352 unsigned long version;
353 unsigned long hash;
354 const XML_Char *uriName;
355 } NS_ATT;
357 typedef struct {
358 const XML_Char *name;
359 PREFIX *prefix;
360 const ATTRIBUTE_ID *idAtt;
361 int nDefaultAtts;
362 int allocDefaultAtts;
363 DEFAULT_ATTRIBUTE *defaultAtts;
364 } ELEMENT_TYPE;
366 typedef struct {
367 HASH_TABLE generalEntities;
368 HASH_TABLE elementTypes;
369 HASH_TABLE attributeIds;
370 HASH_TABLE prefixes;
371 STRING_POOL pool;
372 STRING_POOL entityValuePool;
373 /* false once a parameter entity reference has been skipped */
374 XML_Bool keepProcessing;
375 /* true once an internal or external PE reference has been encountered;
376 this includes the reference to an external subset */
377 XML_Bool hasParamEntityRefs;
378 XML_Bool standalone;
379 #ifdef XML_DTD
380 /* indicates if external PE has been read */
381 XML_Bool paramEntityRead;
382 HASH_TABLE paramEntities;
383 #endif /* XML_DTD */
384 PREFIX defaultPrefix;
385 /* === scaffolding for building content model === */
386 XML_Bool in_eldecl;
387 CONTENT_SCAFFOLD *scaffold;
388 unsigned contentStringLen;
389 unsigned scaffSize;
390 unsigned scaffCount;
391 int scaffLevel;
392 int *scaffIndex;
393 } DTD;
395 typedef struct open_internal_entity {
396 const char *internalEventPtr;
397 const char *internalEventEndPtr;
398 struct open_internal_entity *next;
399 ENTITY *entity;
400 int startTagLevel;
401 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
402 } OPEN_INTERNAL_ENTITY;
404 enum XML_Account {
405 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
406 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
407 expansion */
408 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
411 #ifdef XML_DTD
412 typedef unsigned long long XmlBigCount;
413 typedef struct accounting {
414 XmlBigCount countBytesDirect;
415 XmlBigCount countBytesIndirect;
416 int debugLevel;
417 float maximumAmplificationFactor; // >=1.0
418 unsigned long long activationThresholdBytes;
419 } ACCOUNTING;
421 typedef struct entity_stats {
422 unsigned int countEverOpened;
423 unsigned int currentDepth;
424 unsigned int maximumDepthSeen;
425 int debugLevel;
426 } ENTITY_STATS;
427 #endif /* XML_DTD */
429 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
430 const char *end, const char **endPtr);
432 static Processor prologProcessor;
433 static Processor prologInitProcessor;
434 static Processor contentProcessor;
435 static Processor cdataSectionProcessor;
436 #ifdef XML_DTD
437 static Processor ignoreSectionProcessor;
438 static Processor externalParEntProcessor;
439 static Processor externalParEntInitProcessor;
440 static Processor entityValueProcessor;
441 static Processor entityValueInitProcessor;
442 #endif /* XML_DTD */
443 static Processor epilogProcessor;
444 static Processor errorProcessor;
445 static Processor externalEntityInitProcessor;
446 static Processor externalEntityInitProcessor2;
447 static Processor externalEntityInitProcessor3;
448 static Processor externalEntityContentProcessor;
449 static Processor internalEntityProcessor;
451 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
452 const XML_Char *encodingName);
453 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
454 const char *s, const char *next);
455 static enum XML_Error initializeEncoding(XML_Parser parser);
456 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
457 const char *s, const char *end, int tok,
458 const char *next, const char **nextPtr,
459 XML_Bool haveMore, XML_Bool allowClosingDoctype,
460 enum XML_Account account);
461 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
462 XML_Bool betweenDecl);
463 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
464 const ENCODING *enc, const char *start,
465 const char *end, const char **endPtr,
466 XML_Bool haveMore, enum XML_Account account);
467 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
468 const char **startPtr, const char *end,
469 const char **nextPtr, XML_Bool haveMore,
470 enum XML_Account account);
471 #ifdef XML_DTD
472 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
473 const char **startPtr, const char *end,
474 const char **nextPtr, XML_Bool haveMore);
475 #endif /* XML_DTD */
477 static void freeBindings(XML_Parser parser, BINDING *bindings);
478 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
479 const char *s, TAG_NAME *tagNamePtr,
480 BINDING **bindingsPtr,
481 enum XML_Account account);
482 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
483 const ATTRIBUTE_ID *attId, const XML_Char *uri,
484 BINDING **bindingsPtr);
485 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
486 XML_Bool isId, const XML_Char *dfltValue,
487 XML_Parser parser);
488 static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
489 XML_Bool isCdata, const char *,
490 const char *, STRING_POOL *,
491 enum XML_Account account);
492 static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
493 XML_Bool isCdata, const char *,
494 const char *, STRING_POOL *,
495 enum XML_Account account);
496 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
497 const char *start, const char *end);
498 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
499 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
500 const char *start, const char *end,
501 enum XML_Account account);
502 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
503 const char *start, const char *end);
504 static int reportComment(XML_Parser parser, const ENCODING *enc,
505 const char *start, const char *end);
506 static void reportDefault(XML_Parser parser, const ENCODING *enc,
507 const char *start, const char *end);
509 static const XML_Char *getContext(XML_Parser parser);
510 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
512 static void FASTCALL normalizePublicId(XML_Char *s);
514 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
515 /* do not call if m_parentParser != NULL */
516 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
517 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
518 const XML_Memory_Handling_Suite *ms);
519 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
520 const XML_Memory_Handling_Suite *ms);
521 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
522 const HASH_TABLE *);
523 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
524 size_t createSize);
525 static void FASTCALL hashTableInit(HASH_TABLE *,
526 const XML_Memory_Handling_Suite *ms);
527 static void FASTCALL hashTableClear(HASH_TABLE *);
528 static void FASTCALL hashTableDestroy(HASH_TABLE *);
529 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
530 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
532 static void FASTCALL poolInit(STRING_POOL *,
533 const XML_Memory_Handling_Suite *ms);
534 static void FASTCALL poolClear(STRING_POOL *);
535 static void FASTCALL poolDestroy(STRING_POOL *);
536 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
537 const char *ptr, const char *end);
538 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
539 const char *ptr, const char *end);
540 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
541 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
542 const XML_Char *s);
543 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
544 int n);
545 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
546 const XML_Char *s);
548 static int FASTCALL nextScaffoldPart(XML_Parser parser);
549 static XML_Content *build_model(XML_Parser parser);
550 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
551 const char *ptr, const char *end);
553 static XML_Char *copyString(const XML_Char *s,
554 const XML_Memory_Handling_Suite *memsuite);
556 static unsigned long generate_hash_secret_salt(XML_Parser parser);
557 static XML_Bool startParsing(XML_Parser parser);
559 static XML_Parser parserCreate(const XML_Char *encodingName,
560 const XML_Memory_Handling_Suite *memsuite,
561 const XML_Char *nameSep, DTD *dtd);
563 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
565 #ifdef XML_DTD
566 static float accountingGetCurrentAmplification(XML_Parser rootParser);
567 static void accountingReportStats(XML_Parser originParser, const char *epilog);
568 static void accountingOnAbort(XML_Parser originParser);
569 static void accountingReportDiff(XML_Parser rootParser,
570 unsigned int levelsAwayFromRootParser,
571 const char *before, const char *after,
572 ptrdiff_t bytesMore, int source_line,
573 enum XML_Account account);
574 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
575 const char *before, const char *after,
576 int source_line,
577 enum XML_Account account);
579 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
580 const char *action, int sourceLine);
581 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
582 int sourceLine);
583 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
584 int sourceLine);
586 static XML_Parser getRootParserOf(XML_Parser parser,
587 unsigned int *outLevelDiff);
588 #endif /* XML_DTD */
590 static unsigned long getDebugLevel(const char *variableName,
591 unsigned long defaultDebugLevel);
593 #define poolStart(pool) ((pool)->start)
594 #define poolEnd(pool) ((pool)->ptr)
595 #define poolLength(pool) ((pool)->ptr - (pool)->start)
596 #define poolChop(pool) ((void)--(pool->ptr))
597 #define poolLastChar(pool) (((pool)->ptr)[-1])
598 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
599 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
600 #define poolAppendChar(pool, c) \
601 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
602 ? 0 \
603 : ((*((pool)->ptr)++ = c), 1))
605 struct XML_ParserStruct {
606 /* The first member must be m_userData so that the XML_GetUserData
607 macro works. */
608 void *m_userData;
609 void *m_handlerArg;
610 char *m_buffer;
611 const XML_Memory_Handling_Suite m_mem;
612 /* first character to be parsed */
613 const char *m_bufferPtr;
614 /* past last character to be parsed */
615 char *m_bufferEnd;
616 /* allocated end of m_buffer */
617 const char *m_bufferLim;
618 XML_Index m_parseEndByteIndex;
619 const char *m_parseEndPtr;
620 XML_Char *m_dataBuf;
621 XML_Char *m_dataBufEnd;
622 XML_StartElementHandler m_startElementHandler;
623 XML_EndElementHandler m_endElementHandler;
624 XML_CharacterDataHandler m_characterDataHandler;
625 XML_ProcessingInstructionHandler m_processingInstructionHandler;
626 XML_CommentHandler m_commentHandler;
627 XML_StartCdataSectionHandler m_startCdataSectionHandler;
628 XML_EndCdataSectionHandler m_endCdataSectionHandler;
629 XML_DefaultHandler m_defaultHandler;
630 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
631 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
632 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
633 XML_NotationDeclHandler m_notationDeclHandler;
634 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
635 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
636 XML_NotStandaloneHandler m_notStandaloneHandler;
637 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
638 XML_Parser m_externalEntityRefHandlerArg;
639 XML_SkippedEntityHandler m_skippedEntityHandler;
640 XML_UnknownEncodingHandler m_unknownEncodingHandler;
641 XML_ElementDeclHandler m_elementDeclHandler;
642 XML_AttlistDeclHandler m_attlistDeclHandler;
643 XML_EntityDeclHandler m_entityDeclHandler;
644 XML_XmlDeclHandler m_xmlDeclHandler;
645 const ENCODING *m_encoding;
646 INIT_ENCODING m_initEncoding;
647 const ENCODING *m_internalEncoding;
648 const XML_Char *m_protocolEncodingName;
649 XML_Bool m_ns;
650 XML_Bool m_ns_triplets;
651 void *m_unknownEncodingMem;
652 void *m_unknownEncodingData;
653 void *m_unknownEncodingHandlerData;
654 void(XMLCALL *m_unknownEncodingRelease)(void *);
655 PROLOG_STATE m_prologState;
656 Processor *m_processor;
657 enum XML_Error m_errorCode;
658 const char *m_eventPtr;
659 const char *m_eventEndPtr;
660 const char *m_positionPtr;
661 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
662 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
663 XML_Bool m_defaultExpandInternalEntities;
664 int m_tagLevel;
665 ENTITY *m_declEntity;
666 const XML_Char *m_doctypeName;
667 const XML_Char *m_doctypeSysid;
668 const XML_Char *m_doctypePubid;
669 const XML_Char *m_declAttributeType;
670 const XML_Char *m_declNotationName;
671 const XML_Char *m_declNotationPublicId;
672 ELEMENT_TYPE *m_declElementType;
673 ATTRIBUTE_ID *m_declAttributeId;
674 XML_Bool m_declAttributeIsCdata;
675 XML_Bool m_declAttributeIsId;
676 DTD *m_dtd;
677 const XML_Char *m_curBase;
678 TAG *m_tagStack;
679 TAG *m_freeTagList;
680 BINDING *m_inheritedBindings;
681 BINDING *m_freeBindingList;
682 int m_attsSize;
683 int m_nSpecifiedAtts;
684 int m_idAttIndex;
685 ATTRIBUTE *m_atts;
686 NS_ATT *m_nsAtts;
687 unsigned long m_nsAttsVersion;
688 unsigned char m_nsAttsPower;
689 #ifdef XML_ATTR_INFO
690 XML_AttrInfo *m_attInfo;
691 #endif
692 POSITION m_position;
693 STRING_POOL m_tempPool;
694 STRING_POOL m_temp2Pool;
695 char *m_groupConnector;
696 unsigned int m_groupSize;
697 XML_Char m_namespaceSeparator;
698 XML_Parser m_parentParser;
699 XML_ParsingStatus m_parsingStatus;
700 #ifdef XML_DTD
701 XML_Bool m_isParamEntity;
702 XML_Bool m_useForeignDTD;
703 enum XML_ParamEntityParsing m_paramEntityParsing;
704 #endif
705 unsigned long m_hash_secret_salt;
706 #ifdef XML_DTD
707 ACCOUNTING m_accounting;
708 ENTITY_STATS m_entity_stats;
709 #endif
712 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
713 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
714 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
716 XML_Parser XMLCALL
717 XML_ParserCreate(const XML_Char *encodingName) {
718 return XML_ParserCreate_MM(encodingName, NULL, NULL);
721 XML_Parser XMLCALL
722 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
723 XML_Char tmp[2] = {nsSep, 0};
724 return XML_ParserCreate_MM(encodingName, NULL, tmp);
727 // "xml=http://www.w3.org/XML/1998/namespace"
728 static const XML_Char implicitContext[]
729 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
730 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
731 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
732 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
733 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
734 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
735 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
736 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
737 '\0'};
739 /* To avoid warnings about unused functions: */
740 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
742 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
744 /* Obtain entropy on Linux 3.17+ */
745 static int
746 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
747 int success = 0; /* full count bytes written? */
748 size_t bytesWrittenTotal = 0;
749 const unsigned int getrandomFlags = GRND_NONBLOCK;
751 do {
752 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
753 const size_t bytesToWrite = count - bytesWrittenTotal;
755 const int bytesWrittenMore =
756 # if defined(HAVE_GETRANDOM)
757 getrandom(currentTarget, bytesToWrite, getrandomFlags);
758 # else
759 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
760 # endif
762 if (bytesWrittenMore > 0) {
763 bytesWrittenTotal += bytesWrittenMore;
764 if (bytesWrittenTotal >= count)
765 success = 1;
767 } while (! success && (errno == EINTR));
769 return success;
772 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
774 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
776 /* Extract entropy from /dev/urandom */
777 static int
778 writeRandomBytes_dev_urandom(void *target, size_t count) {
779 int success = 0; /* full count bytes written? */
780 size_t bytesWrittenTotal = 0;
782 const int fd = open("/dev/urandom", O_RDONLY);
783 if (fd < 0) {
784 return 0;
787 do {
788 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
789 const size_t bytesToWrite = count - bytesWrittenTotal;
791 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
793 if (bytesWrittenMore > 0) {
794 bytesWrittenTotal += bytesWrittenMore;
795 if (bytesWrittenTotal >= count)
796 success = 1;
798 } while (! success && (errno == EINTR));
800 close(fd);
801 return success;
804 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
806 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
808 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
810 static void
811 writeRandomBytes_arc4random(void *target, size_t count) {
812 size_t bytesWrittenTotal = 0;
814 while (bytesWrittenTotal < count) {
815 const uint32_t random32 = arc4random();
816 size_t i = 0;
818 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
819 i++, bytesWrittenTotal++) {
820 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
821 ((uint8_t *)target)[bytesWrittenTotal] = random8;
826 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
828 #ifdef _WIN32
830 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
831 as it didn't declare it in its header prior to version 5.3.0 of its
832 runtime package (mingwrt, containing stdlib.h). The upstream fix
833 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
834 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
835 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
836 __declspec(dllimport) int rand_s(unsigned int *);
837 # endif
839 /* Obtain entropy on Windows using the rand_s() function which
840 * generates cryptographically secure random numbers. Internally it
841 * uses RtlGenRandom API which is present in Windows XP and later.
843 static int
844 writeRandomBytes_rand_s(void *target, size_t count) {
845 size_t bytesWrittenTotal = 0;
847 while (bytesWrittenTotal < count) {
848 unsigned int random32 = 0;
849 size_t i = 0;
851 if (rand_s(&random32))
852 return 0; /* failure */
854 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
855 i++, bytesWrittenTotal++) {
856 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
857 ((uint8_t *)target)[bytesWrittenTotal] = random8;
860 return 1; /* success */
863 #endif /* _WIN32 */
865 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
867 static unsigned long
868 gather_time_entropy(void) {
869 # ifdef _WIN32
870 FILETIME ft;
871 GetSystemTimeAsFileTime(&ft); /* never fails */
872 return ft.dwHighDateTime ^ ft.dwLowDateTime;
873 # else
874 struct timeval tv;
875 int gettimeofday_res;
877 gettimeofday_res = gettimeofday(&tv, NULL);
879 # if defined(NDEBUG)
880 (void)gettimeofday_res;
881 # else
882 assert(gettimeofday_res == 0);
883 # endif /* defined(NDEBUG) */
885 /* Microseconds time is <20 bits entropy */
886 return tv.tv_usec;
887 # endif
890 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
892 static unsigned long
893 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
894 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
895 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
896 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
898 return entropy;
901 static unsigned long
902 generate_hash_secret_salt(XML_Parser parser) {
903 unsigned long entropy;
904 (void)parser;
906 /* "Failproof" high quality providers: */
907 #if defined(HAVE_ARC4RANDOM_BUF)
908 arc4random_buf(&entropy, sizeof(entropy));
909 return ENTROPY_DEBUG("arc4random_buf", entropy);
910 #elif defined(HAVE_ARC4RANDOM)
911 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
912 return ENTROPY_DEBUG("arc4random", entropy);
913 #else
914 /* Try high quality providers first .. */
915 # ifdef _WIN32
916 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
917 return ENTROPY_DEBUG("rand_s", entropy);
919 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
920 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
921 return ENTROPY_DEBUG("getrandom", entropy);
923 # endif
924 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
925 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
926 return ENTROPY_DEBUG("/dev/urandom", entropy);
928 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
929 /* .. and self-made low quality for backup: */
931 /* Process ID is 0 bits entropy if attacker has local access */
932 entropy = gather_time_entropy() ^ getpid();
934 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
935 if (sizeof(unsigned long) == 4) {
936 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
937 } else {
938 return ENTROPY_DEBUG("fallback(8)",
939 entropy * (unsigned long)2305843009213693951ULL);
941 #endif
944 static unsigned long
945 get_hash_secret_salt(XML_Parser parser) {
946 if (parser->m_parentParser != NULL)
947 return get_hash_secret_salt(parser->m_parentParser);
948 return parser->m_hash_secret_salt;
951 static XML_Bool /* only valid for root parser */
952 startParsing(XML_Parser parser) {
953 /* hash functions must be initialized before setContext() is called */
954 if (parser->m_hash_secret_salt == 0)
955 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
956 if (parser->m_ns) {
957 /* implicit context only set for root parser, since child
958 parsers (i.e. external entity parsers) will inherit it
960 return setContext(parser, implicitContext);
962 return XML_TRUE;
965 XML_Parser XMLCALL
966 XML_ParserCreate_MM(const XML_Char *encodingName,
967 const XML_Memory_Handling_Suite *memsuite,
968 const XML_Char *nameSep) {
969 return parserCreate(encodingName, memsuite, nameSep, NULL);
972 static XML_Parser
973 parserCreate(const XML_Char *encodingName,
974 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
975 DTD *dtd) {
976 XML_Parser parser;
978 if (memsuite) {
979 XML_Memory_Handling_Suite *mtemp;
980 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
981 if (parser != NULL) {
982 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
983 mtemp->malloc_fcn = memsuite->malloc_fcn;
984 mtemp->realloc_fcn = memsuite->realloc_fcn;
985 mtemp->free_fcn = memsuite->free_fcn;
987 } else {
988 XML_Memory_Handling_Suite *mtemp;
989 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
990 if (parser != NULL) {
991 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
992 mtemp->malloc_fcn = malloc;
993 mtemp->realloc_fcn = realloc;
994 mtemp->free_fcn = free;
998 if (! parser)
999 return parser;
1001 parser->m_buffer = NULL;
1002 parser->m_bufferLim = NULL;
1004 parser->m_attsSize = INIT_ATTS_SIZE;
1005 parser->m_atts
1006 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1007 if (parser->m_atts == NULL) {
1008 FREE(parser, parser);
1009 return NULL;
1011 #ifdef XML_ATTR_INFO
1012 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1013 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1014 if (parser->m_attInfo == NULL) {
1015 FREE(parser, parser->m_atts);
1016 FREE(parser, parser);
1017 return NULL;
1019 #endif
1020 parser->m_dataBuf
1021 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1022 if (parser->m_dataBuf == NULL) {
1023 FREE(parser, parser->m_atts);
1024 #ifdef XML_ATTR_INFO
1025 FREE(parser, parser->m_attInfo);
1026 #endif
1027 FREE(parser, parser);
1028 return NULL;
1030 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1032 if (dtd)
1033 parser->m_dtd = dtd;
1034 else {
1035 parser->m_dtd = dtdCreate(&parser->m_mem);
1036 if (parser->m_dtd == NULL) {
1037 FREE(parser, parser->m_dataBuf);
1038 FREE(parser, parser->m_atts);
1039 #ifdef XML_ATTR_INFO
1040 FREE(parser, parser->m_attInfo);
1041 #endif
1042 FREE(parser, parser);
1043 return NULL;
1047 parser->m_freeBindingList = NULL;
1048 parser->m_freeTagList = NULL;
1049 parser->m_freeInternalEntities = NULL;
1051 parser->m_groupSize = 0;
1052 parser->m_groupConnector = NULL;
1054 parser->m_unknownEncodingHandler = NULL;
1055 parser->m_unknownEncodingHandlerData = NULL;
1057 parser->m_namespaceSeparator = ASCII_EXCL;
1058 parser->m_ns = XML_FALSE;
1059 parser->m_ns_triplets = XML_FALSE;
1061 parser->m_nsAtts = NULL;
1062 parser->m_nsAttsVersion = 0;
1063 parser->m_nsAttsPower = 0;
1065 parser->m_protocolEncodingName = NULL;
1067 poolInit(&parser->m_tempPool, &(parser->m_mem));
1068 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1069 parserInit(parser, encodingName);
1071 if (encodingName && ! parser->m_protocolEncodingName) {
1072 if (dtd) {
1073 // We need to stop the upcoming call to XML_ParserFree from happily
1074 // destroying parser->m_dtd because the DTD is shared with the parent
1075 // parser and the only guard that keeps XML_ParserFree from destroying
1076 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1077 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1078 parser->m_dtd = NULL;
1080 XML_ParserFree(parser);
1081 return NULL;
1084 if (nameSep) {
1085 parser->m_ns = XML_TRUE;
1086 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1087 parser->m_namespaceSeparator = *nameSep;
1088 } else {
1089 parser->m_internalEncoding = XmlGetInternalEncoding();
1092 return parser;
1095 static void
1096 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1097 parser->m_processor = prologInitProcessor;
1098 XmlPrologStateInit(&parser->m_prologState);
1099 if (encodingName != NULL) {
1100 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1102 parser->m_curBase = NULL;
1103 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1104 parser->m_userData = NULL;
1105 parser->m_handlerArg = NULL;
1106 parser->m_startElementHandler = NULL;
1107 parser->m_endElementHandler = NULL;
1108 parser->m_characterDataHandler = NULL;
1109 parser->m_processingInstructionHandler = NULL;
1110 parser->m_commentHandler = NULL;
1111 parser->m_startCdataSectionHandler = NULL;
1112 parser->m_endCdataSectionHandler = NULL;
1113 parser->m_defaultHandler = NULL;
1114 parser->m_startDoctypeDeclHandler = NULL;
1115 parser->m_endDoctypeDeclHandler = NULL;
1116 parser->m_unparsedEntityDeclHandler = NULL;
1117 parser->m_notationDeclHandler = NULL;
1118 parser->m_startNamespaceDeclHandler = NULL;
1119 parser->m_endNamespaceDeclHandler = NULL;
1120 parser->m_notStandaloneHandler = NULL;
1121 parser->m_externalEntityRefHandler = NULL;
1122 parser->m_externalEntityRefHandlerArg = parser;
1123 parser->m_skippedEntityHandler = NULL;
1124 parser->m_elementDeclHandler = NULL;
1125 parser->m_attlistDeclHandler = NULL;
1126 parser->m_entityDeclHandler = NULL;
1127 parser->m_xmlDeclHandler = NULL;
1128 parser->m_bufferPtr = parser->m_buffer;
1129 parser->m_bufferEnd = parser->m_buffer;
1130 parser->m_parseEndByteIndex = 0;
1131 parser->m_parseEndPtr = NULL;
1132 parser->m_declElementType = NULL;
1133 parser->m_declAttributeId = NULL;
1134 parser->m_declEntity = NULL;
1135 parser->m_doctypeName = NULL;
1136 parser->m_doctypeSysid = NULL;
1137 parser->m_doctypePubid = NULL;
1138 parser->m_declAttributeType = NULL;
1139 parser->m_declNotationName = NULL;
1140 parser->m_declNotationPublicId = NULL;
1141 parser->m_declAttributeIsCdata = XML_FALSE;
1142 parser->m_declAttributeIsId = XML_FALSE;
1143 memset(&parser->m_position, 0, sizeof(POSITION));
1144 parser->m_errorCode = XML_ERROR_NONE;
1145 parser->m_eventPtr = NULL;
1146 parser->m_eventEndPtr = NULL;
1147 parser->m_positionPtr = NULL;
1148 parser->m_openInternalEntities = NULL;
1149 parser->m_defaultExpandInternalEntities = XML_TRUE;
1150 parser->m_tagLevel = 0;
1151 parser->m_tagStack = NULL;
1152 parser->m_inheritedBindings = NULL;
1153 parser->m_nSpecifiedAtts = 0;
1154 parser->m_unknownEncodingMem = NULL;
1155 parser->m_unknownEncodingRelease = NULL;
1156 parser->m_unknownEncodingData = NULL;
1157 parser->m_parentParser = NULL;
1158 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1159 #ifdef XML_DTD
1160 parser->m_isParamEntity = XML_FALSE;
1161 parser->m_useForeignDTD = XML_FALSE;
1162 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1163 #endif
1164 parser->m_hash_secret_salt = 0;
1166 #ifdef XML_DTD
1167 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1168 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1169 parser->m_accounting.maximumAmplificationFactor
1170 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1171 parser->m_accounting.activationThresholdBytes
1172 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1174 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1175 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1176 #endif
1179 /* moves list of bindings to m_freeBindingList */
1180 static void FASTCALL
1181 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1182 while (bindings) {
1183 BINDING *b = bindings;
1184 bindings = bindings->nextTagBinding;
1185 b->nextTagBinding = parser->m_freeBindingList;
1186 parser->m_freeBindingList = b;
1190 XML_Bool XMLCALL
1191 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1192 TAG *tStk;
1193 OPEN_INTERNAL_ENTITY *openEntityList;
1195 if (parser == NULL)
1196 return XML_FALSE;
1198 if (parser->m_parentParser)
1199 return XML_FALSE;
1200 /* move m_tagStack to m_freeTagList */
1201 tStk = parser->m_tagStack;
1202 while (tStk) {
1203 TAG *tag = tStk;
1204 tStk = tStk->parent;
1205 tag->parent = parser->m_freeTagList;
1206 moveToFreeBindingList(parser, tag->bindings);
1207 tag->bindings = NULL;
1208 parser->m_freeTagList = tag;
1210 /* move m_openInternalEntities to m_freeInternalEntities */
1211 openEntityList = parser->m_openInternalEntities;
1212 while (openEntityList) {
1213 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1214 openEntityList = openEntity->next;
1215 openEntity->next = parser->m_freeInternalEntities;
1216 parser->m_freeInternalEntities = openEntity;
1218 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1219 FREE(parser, parser->m_unknownEncodingMem);
1220 if (parser->m_unknownEncodingRelease)
1221 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1222 poolClear(&parser->m_tempPool);
1223 poolClear(&parser->m_temp2Pool);
1224 FREE(parser, (void *)parser->m_protocolEncodingName);
1225 parser->m_protocolEncodingName = NULL;
1226 parserInit(parser, encodingName);
1227 dtdReset(parser->m_dtd, &parser->m_mem);
1228 return XML_TRUE;
1231 enum XML_Status XMLCALL
1232 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1233 if (parser == NULL)
1234 return XML_STATUS_ERROR;
1235 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1236 XXX There's no way for the caller to determine which of the
1237 XXX possible error cases caused the XML_STATUS_ERROR return.
1239 if (parser->m_parsingStatus.parsing == XML_PARSING
1240 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1241 return XML_STATUS_ERROR;
1243 /* Get rid of any previous encoding name */
1244 FREE(parser, (void *)parser->m_protocolEncodingName);
1246 if (encodingName == NULL)
1247 /* No new encoding name */
1248 parser->m_protocolEncodingName = NULL;
1249 else {
1250 /* Copy the new encoding name into allocated memory */
1251 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1252 if (! parser->m_protocolEncodingName)
1253 return XML_STATUS_ERROR;
1255 return XML_STATUS_OK;
1258 XML_Parser XMLCALL
1259 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1260 const XML_Char *encodingName) {
1261 XML_Parser parser = oldParser;
1262 DTD *newDtd = NULL;
1263 DTD *oldDtd;
1264 XML_StartElementHandler oldStartElementHandler;
1265 XML_EndElementHandler oldEndElementHandler;
1266 XML_CharacterDataHandler oldCharacterDataHandler;
1267 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1268 XML_CommentHandler oldCommentHandler;
1269 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1270 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1271 XML_DefaultHandler oldDefaultHandler;
1272 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1273 XML_NotationDeclHandler oldNotationDeclHandler;
1274 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1275 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1276 XML_NotStandaloneHandler oldNotStandaloneHandler;
1277 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1278 XML_SkippedEntityHandler oldSkippedEntityHandler;
1279 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1280 XML_ElementDeclHandler oldElementDeclHandler;
1281 XML_AttlistDeclHandler oldAttlistDeclHandler;
1282 XML_EntityDeclHandler oldEntityDeclHandler;
1283 XML_XmlDeclHandler oldXmlDeclHandler;
1284 ELEMENT_TYPE *oldDeclElementType;
1286 void *oldUserData;
1287 void *oldHandlerArg;
1288 XML_Bool oldDefaultExpandInternalEntities;
1289 XML_Parser oldExternalEntityRefHandlerArg;
1290 #ifdef XML_DTD
1291 enum XML_ParamEntityParsing oldParamEntityParsing;
1292 int oldInEntityValue;
1293 #endif
1294 XML_Bool oldns_triplets;
1295 /* Note that the new parser shares the same hash secret as the old
1296 parser, so that dtdCopy and copyEntityTable can lookup values
1297 from hash tables associated with either parser without us having
1298 to worry which hash secrets each table has.
1300 unsigned long oldhash_secret_salt;
1302 /* Validate the oldParser parameter before we pull everything out of it */
1303 if (oldParser == NULL)
1304 return NULL;
1306 /* Stash the original parser contents on the stack */
1307 oldDtd = parser->m_dtd;
1308 oldStartElementHandler = parser->m_startElementHandler;
1309 oldEndElementHandler = parser->m_endElementHandler;
1310 oldCharacterDataHandler = parser->m_characterDataHandler;
1311 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1312 oldCommentHandler = parser->m_commentHandler;
1313 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1314 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1315 oldDefaultHandler = parser->m_defaultHandler;
1316 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1317 oldNotationDeclHandler = parser->m_notationDeclHandler;
1318 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1319 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1320 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1321 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1322 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1323 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1324 oldElementDeclHandler = parser->m_elementDeclHandler;
1325 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1326 oldEntityDeclHandler = parser->m_entityDeclHandler;
1327 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1328 oldDeclElementType = parser->m_declElementType;
1330 oldUserData = parser->m_userData;
1331 oldHandlerArg = parser->m_handlerArg;
1332 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1333 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1334 #ifdef XML_DTD
1335 oldParamEntityParsing = parser->m_paramEntityParsing;
1336 oldInEntityValue = parser->m_prologState.inEntityValue;
1337 #endif
1338 oldns_triplets = parser->m_ns_triplets;
1339 /* Note that the new parser shares the same hash secret as the old
1340 parser, so that dtdCopy and copyEntityTable can lookup values
1341 from hash tables associated with either parser without us having
1342 to worry which hash secrets each table has.
1344 oldhash_secret_salt = parser->m_hash_secret_salt;
1346 #ifdef XML_DTD
1347 if (! context)
1348 newDtd = oldDtd;
1349 #endif /* XML_DTD */
1351 /* Note that the magical uses of the pre-processor to make field
1352 access look more like C++ require that `parser' be overwritten
1353 here. This makes this function more painful to follow than it
1354 would be otherwise.
1356 if (parser->m_ns) {
1357 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1358 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1359 } else {
1360 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1363 if (! parser)
1364 return NULL;
1366 parser->m_startElementHandler = oldStartElementHandler;
1367 parser->m_endElementHandler = oldEndElementHandler;
1368 parser->m_characterDataHandler = oldCharacterDataHandler;
1369 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1370 parser->m_commentHandler = oldCommentHandler;
1371 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1372 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1373 parser->m_defaultHandler = oldDefaultHandler;
1374 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1375 parser->m_notationDeclHandler = oldNotationDeclHandler;
1376 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1377 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1378 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1379 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1380 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1381 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1382 parser->m_elementDeclHandler = oldElementDeclHandler;
1383 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1384 parser->m_entityDeclHandler = oldEntityDeclHandler;
1385 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1386 parser->m_declElementType = oldDeclElementType;
1387 parser->m_userData = oldUserData;
1388 if (oldUserData == oldHandlerArg)
1389 parser->m_handlerArg = parser->m_userData;
1390 else
1391 parser->m_handlerArg = parser;
1392 if (oldExternalEntityRefHandlerArg != oldParser)
1393 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1394 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1395 parser->m_ns_triplets = oldns_triplets;
1396 parser->m_hash_secret_salt = oldhash_secret_salt;
1397 parser->m_parentParser = oldParser;
1398 #ifdef XML_DTD
1399 parser->m_paramEntityParsing = oldParamEntityParsing;
1400 parser->m_prologState.inEntityValue = oldInEntityValue;
1401 if (context) {
1402 #endif /* XML_DTD */
1403 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1404 || ! setContext(parser, context)) {
1405 XML_ParserFree(parser);
1406 return NULL;
1408 parser->m_processor = externalEntityInitProcessor;
1409 #ifdef XML_DTD
1410 } else {
1411 /* The DTD instance referenced by parser->m_dtd is shared between the
1412 document's root parser and external PE parsers, therefore one does not
1413 need to call setContext. In addition, one also *must* not call
1414 setContext, because this would overwrite existing prefix->binding
1415 pointers in parser->m_dtd with ones that get destroyed with the external
1416 PE parser. This would leave those prefixes with dangling pointers.
1418 parser->m_isParamEntity = XML_TRUE;
1419 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1420 parser->m_processor = externalParEntInitProcessor;
1422 #endif /* XML_DTD */
1423 return parser;
1426 static void FASTCALL
1427 destroyBindings(BINDING *bindings, XML_Parser parser) {
1428 for (;;) {
1429 BINDING *b = bindings;
1430 if (! b)
1431 break;
1432 bindings = b->nextTagBinding;
1433 FREE(parser, b->uri);
1434 FREE(parser, b);
1438 void XMLCALL
1439 XML_ParserFree(XML_Parser parser) {
1440 TAG *tagList;
1441 OPEN_INTERNAL_ENTITY *entityList;
1442 if (parser == NULL)
1443 return;
1444 /* free m_tagStack and m_freeTagList */
1445 tagList = parser->m_tagStack;
1446 for (;;) {
1447 TAG *p;
1448 if (tagList == NULL) {
1449 if (parser->m_freeTagList == NULL)
1450 break;
1451 tagList = parser->m_freeTagList;
1452 parser->m_freeTagList = NULL;
1454 p = tagList;
1455 tagList = tagList->parent;
1456 FREE(parser, p->buf);
1457 destroyBindings(p->bindings, parser);
1458 FREE(parser, p);
1460 /* free m_openInternalEntities and m_freeInternalEntities */
1461 entityList = parser->m_openInternalEntities;
1462 for (;;) {
1463 OPEN_INTERNAL_ENTITY *openEntity;
1464 if (entityList == NULL) {
1465 if (parser->m_freeInternalEntities == NULL)
1466 break;
1467 entityList = parser->m_freeInternalEntities;
1468 parser->m_freeInternalEntities = NULL;
1470 openEntity = entityList;
1471 entityList = entityList->next;
1472 FREE(parser, openEntity);
1475 destroyBindings(parser->m_freeBindingList, parser);
1476 destroyBindings(parser->m_inheritedBindings, parser);
1477 poolDestroy(&parser->m_tempPool);
1478 poolDestroy(&parser->m_temp2Pool);
1479 FREE(parser, (void *)parser->m_protocolEncodingName);
1480 #ifdef XML_DTD
1481 /* external parameter entity parsers share the DTD structure
1482 parser->m_dtd with the root parser, so we must not destroy it
1484 if (! parser->m_isParamEntity && parser->m_dtd)
1485 #else
1486 if (parser->m_dtd)
1487 #endif /* XML_DTD */
1488 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1489 &parser->m_mem);
1490 FREE(parser, (void *)parser->m_atts);
1491 #ifdef XML_ATTR_INFO
1492 FREE(parser, (void *)parser->m_attInfo);
1493 #endif
1494 FREE(parser, parser->m_groupConnector);
1495 FREE(parser, parser->m_buffer);
1496 FREE(parser, parser->m_dataBuf);
1497 FREE(parser, parser->m_nsAtts);
1498 FREE(parser, parser->m_unknownEncodingMem);
1499 if (parser->m_unknownEncodingRelease)
1500 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1501 FREE(parser, parser);
1504 void XMLCALL
1505 XML_UseParserAsHandlerArg(XML_Parser parser) {
1506 if (parser != NULL)
1507 parser->m_handlerArg = parser;
1510 enum XML_Error XMLCALL
1511 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1512 if (parser == NULL)
1513 return XML_ERROR_INVALID_ARGUMENT;
1514 #ifdef XML_DTD
1515 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1516 if (parser->m_parsingStatus.parsing == XML_PARSING
1517 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1518 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1519 parser->m_useForeignDTD = useDTD;
1520 return XML_ERROR_NONE;
1521 #else
1522 UNUSED_P(useDTD);
1523 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1524 #endif
1527 void XMLCALL
1528 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1529 if (parser == NULL)
1530 return;
1531 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1532 if (parser->m_parsingStatus.parsing == XML_PARSING
1533 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1534 return;
1535 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1538 void XMLCALL
1539 XML_SetUserData(XML_Parser parser, void *p) {
1540 if (parser == NULL)
1541 return;
1542 if (parser->m_handlerArg == parser->m_userData)
1543 parser->m_handlerArg = parser->m_userData = p;
1544 else
1545 parser->m_userData = p;
1548 enum XML_Status XMLCALL
1549 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1550 if (parser == NULL)
1551 return XML_STATUS_ERROR;
1552 if (p) {
1553 p = poolCopyString(&parser->m_dtd->pool, p);
1554 if (! p)
1555 return XML_STATUS_ERROR;
1556 parser->m_curBase = p;
1557 } else
1558 parser->m_curBase = NULL;
1559 return XML_STATUS_OK;
1562 const XML_Char *XMLCALL
1563 XML_GetBase(XML_Parser parser) {
1564 if (parser == NULL)
1565 return NULL;
1566 return parser->m_curBase;
1569 int XMLCALL
1570 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1571 if (parser == NULL)
1572 return -1;
1573 return parser->m_nSpecifiedAtts;
1576 int XMLCALL
1577 XML_GetIdAttributeIndex(XML_Parser parser) {
1578 if (parser == NULL)
1579 return -1;
1580 return parser->m_idAttIndex;
1583 #ifdef XML_ATTR_INFO
1584 const XML_AttrInfo *XMLCALL
1585 XML_GetAttributeInfo(XML_Parser parser) {
1586 if (parser == NULL)
1587 return NULL;
1588 return parser->m_attInfo;
1590 #endif
1592 void XMLCALL
1593 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1594 XML_EndElementHandler end) {
1595 if (parser == NULL)
1596 return;
1597 parser->m_startElementHandler = start;
1598 parser->m_endElementHandler = end;
1601 void XMLCALL
1602 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1603 if (parser != NULL)
1604 parser->m_startElementHandler = start;
1607 void XMLCALL
1608 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1609 if (parser != NULL)
1610 parser->m_endElementHandler = end;
1613 void XMLCALL
1614 XML_SetCharacterDataHandler(XML_Parser parser,
1615 XML_CharacterDataHandler handler) {
1616 if (parser != NULL)
1617 parser->m_characterDataHandler = handler;
1620 void XMLCALL
1621 XML_SetProcessingInstructionHandler(XML_Parser parser,
1622 XML_ProcessingInstructionHandler handler) {
1623 if (parser != NULL)
1624 parser->m_processingInstructionHandler = handler;
1627 void XMLCALL
1628 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1629 if (parser != NULL)
1630 parser->m_commentHandler = handler;
1633 void XMLCALL
1634 XML_SetCdataSectionHandler(XML_Parser parser,
1635 XML_StartCdataSectionHandler start,
1636 XML_EndCdataSectionHandler end) {
1637 if (parser == NULL)
1638 return;
1639 parser->m_startCdataSectionHandler = start;
1640 parser->m_endCdataSectionHandler = end;
1643 void XMLCALL
1644 XML_SetStartCdataSectionHandler(XML_Parser parser,
1645 XML_StartCdataSectionHandler start) {
1646 if (parser != NULL)
1647 parser->m_startCdataSectionHandler = start;
1650 void XMLCALL
1651 XML_SetEndCdataSectionHandler(XML_Parser parser,
1652 XML_EndCdataSectionHandler end) {
1653 if (parser != NULL)
1654 parser->m_endCdataSectionHandler = end;
1657 void XMLCALL
1658 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1659 if (parser == NULL)
1660 return;
1661 parser->m_defaultHandler = handler;
1662 parser->m_defaultExpandInternalEntities = XML_FALSE;
1665 void XMLCALL
1666 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1667 if (parser == NULL)
1668 return;
1669 parser->m_defaultHandler = handler;
1670 parser->m_defaultExpandInternalEntities = XML_TRUE;
1673 void XMLCALL
1674 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1675 XML_EndDoctypeDeclHandler end) {
1676 if (parser == NULL)
1677 return;
1678 parser->m_startDoctypeDeclHandler = start;
1679 parser->m_endDoctypeDeclHandler = end;
1682 void XMLCALL
1683 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1684 XML_StartDoctypeDeclHandler start) {
1685 if (parser != NULL)
1686 parser->m_startDoctypeDeclHandler = start;
1689 void XMLCALL
1690 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1691 if (parser != NULL)
1692 parser->m_endDoctypeDeclHandler = end;
1695 void XMLCALL
1696 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1697 XML_UnparsedEntityDeclHandler handler) {
1698 if (parser != NULL)
1699 parser->m_unparsedEntityDeclHandler = handler;
1702 void XMLCALL
1703 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1704 if (parser != NULL)
1705 parser->m_notationDeclHandler = handler;
1708 void XMLCALL
1709 XML_SetNamespaceDeclHandler(XML_Parser parser,
1710 XML_StartNamespaceDeclHandler start,
1711 XML_EndNamespaceDeclHandler end) {
1712 if (parser == NULL)
1713 return;
1714 parser->m_startNamespaceDeclHandler = start;
1715 parser->m_endNamespaceDeclHandler = end;
1718 void XMLCALL
1719 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1720 XML_StartNamespaceDeclHandler start) {
1721 if (parser != NULL)
1722 parser->m_startNamespaceDeclHandler = start;
1725 void XMLCALL
1726 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1727 XML_EndNamespaceDeclHandler end) {
1728 if (parser != NULL)
1729 parser->m_endNamespaceDeclHandler = end;
1732 void XMLCALL
1733 XML_SetNotStandaloneHandler(XML_Parser parser,
1734 XML_NotStandaloneHandler handler) {
1735 if (parser != NULL)
1736 parser->m_notStandaloneHandler = handler;
1739 void XMLCALL
1740 XML_SetExternalEntityRefHandler(XML_Parser parser,
1741 XML_ExternalEntityRefHandler handler) {
1742 if (parser != NULL)
1743 parser->m_externalEntityRefHandler = handler;
1746 void XMLCALL
1747 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1748 if (parser == NULL)
1749 return;
1750 if (arg)
1751 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1752 else
1753 parser->m_externalEntityRefHandlerArg = parser;
1756 void XMLCALL
1757 XML_SetSkippedEntityHandler(XML_Parser parser,
1758 XML_SkippedEntityHandler handler) {
1759 if (parser != NULL)
1760 parser->m_skippedEntityHandler = handler;
1763 void XMLCALL
1764 XML_SetUnknownEncodingHandler(XML_Parser parser,
1765 XML_UnknownEncodingHandler handler, void *data) {
1766 if (parser == NULL)
1767 return;
1768 parser->m_unknownEncodingHandler = handler;
1769 parser->m_unknownEncodingHandlerData = data;
1772 void XMLCALL
1773 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1774 if (parser != NULL)
1775 parser->m_elementDeclHandler = eldecl;
1778 void XMLCALL
1779 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1780 if (parser != NULL)
1781 parser->m_attlistDeclHandler = attdecl;
1784 void XMLCALL
1785 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1786 if (parser != NULL)
1787 parser->m_entityDeclHandler = handler;
1790 void XMLCALL
1791 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1792 if (parser != NULL)
1793 parser->m_xmlDeclHandler = handler;
1796 int XMLCALL
1797 XML_SetParamEntityParsing(XML_Parser parser,
1798 enum XML_ParamEntityParsing peParsing) {
1799 if (parser == NULL)
1800 return 0;
1801 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1802 if (parser->m_parsingStatus.parsing == XML_PARSING
1803 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1804 return 0;
1805 #ifdef XML_DTD
1806 parser->m_paramEntityParsing = peParsing;
1807 return 1;
1808 #else
1809 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1810 #endif
1813 int XMLCALL
1814 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1815 if (parser == NULL)
1816 return 0;
1817 if (parser->m_parentParser)
1818 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1819 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1820 if (parser->m_parsingStatus.parsing == XML_PARSING
1821 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1822 return 0;
1823 parser->m_hash_secret_salt = hash_salt;
1824 return 1;
1827 enum XML_Status XMLCALL
1828 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1829 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1830 if (parser != NULL)
1831 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1832 return XML_STATUS_ERROR;
1834 switch (parser->m_parsingStatus.parsing) {
1835 case XML_SUSPENDED:
1836 parser->m_errorCode = XML_ERROR_SUSPENDED;
1837 return XML_STATUS_ERROR;
1838 case XML_FINISHED:
1839 parser->m_errorCode = XML_ERROR_FINISHED;
1840 return XML_STATUS_ERROR;
1841 case XML_INITIALIZED:
1842 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1843 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1844 return XML_STATUS_ERROR;
1846 /* fall through */
1847 default:
1848 parser->m_parsingStatus.parsing = XML_PARSING;
1851 if (len == 0) {
1852 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1853 if (! isFinal)
1854 return XML_STATUS_OK;
1855 parser->m_positionPtr = parser->m_bufferPtr;
1856 parser->m_parseEndPtr = parser->m_bufferEnd;
1858 /* If data are left over from last buffer, and we now know that these
1859 data are the final chunk of input, then we have to check them again
1860 to detect errors based on that fact.
1862 parser->m_errorCode
1863 = parser->m_processor(parser, parser->m_bufferPtr,
1864 parser->m_parseEndPtr, &parser->m_bufferPtr);
1866 if (parser->m_errorCode == XML_ERROR_NONE) {
1867 switch (parser->m_parsingStatus.parsing) {
1868 case XML_SUSPENDED:
1869 /* It is hard to be certain, but it seems that this case
1870 * cannot occur. This code is cleaning up a previous parse
1871 * with no new data (since len == 0). Changing the parsing
1872 * state requires getting to execute a handler function, and
1873 * there doesn't seem to be an opportunity for that while in
1874 * this circumstance.
1876 * Given the uncertainty, we retain the code but exclude it
1877 * from coverage tests.
1879 * LCOV_EXCL_START
1881 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1882 parser->m_bufferPtr, &parser->m_position);
1883 parser->m_positionPtr = parser->m_bufferPtr;
1884 return XML_STATUS_SUSPENDED;
1885 /* LCOV_EXCL_STOP */
1886 case XML_INITIALIZED:
1887 case XML_PARSING:
1888 parser->m_parsingStatus.parsing = XML_FINISHED;
1889 /* fall through */
1890 default:
1891 return XML_STATUS_OK;
1894 parser->m_eventEndPtr = parser->m_eventPtr;
1895 parser->m_processor = errorProcessor;
1896 return XML_STATUS_ERROR;
1898 #ifndef XML_CONTEXT_BYTES
1899 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1900 const char *end;
1901 int nLeftOver;
1902 enum XML_Status result;
1903 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1904 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1905 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1906 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1907 parser->m_processor = errorProcessor;
1908 return XML_STATUS_ERROR;
1910 parser->m_parseEndByteIndex += len;
1911 parser->m_positionPtr = s;
1912 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1914 parser->m_errorCode
1915 = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1917 if (parser->m_errorCode != XML_ERROR_NONE) {
1918 parser->m_eventEndPtr = parser->m_eventPtr;
1919 parser->m_processor = errorProcessor;
1920 return XML_STATUS_ERROR;
1921 } else {
1922 switch (parser->m_parsingStatus.parsing) {
1923 case XML_SUSPENDED:
1924 result = XML_STATUS_SUSPENDED;
1925 break;
1926 case XML_INITIALIZED:
1927 case XML_PARSING:
1928 if (isFinal) {
1929 parser->m_parsingStatus.parsing = XML_FINISHED;
1930 return XML_STATUS_OK;
1932 /* fall through */
1933 default:
1934 result = XML_STATUS_OK;
1938 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1939 &parser->m_position);
1940 nLeftOver = s + len - end;
1941 if (nLeftOver) {
1942 if (parser->m_buffer == NULL
1943 || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1944 /* avoid _signed_ integer overflow */
1945 char *temp = NULL;
1946 const int bytesToAllocate = (int)((unsigned)len * 2U);
1947 if (bytesToAllocate > 0) {
1948 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1950 if (temp == NULL) {
1951 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1952 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1953 parser->m_processor = errorProcessor;
1954 return XML_STATUS_ERROR;
1956 parser->m_buffer = temp;
1957 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1959 memcpy(parser->m_buffer, end, nLeftOver);
1961 parser->m_bufferPtr = parser->m_buffer;
1962 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1963 parser->m_positionPtr = parser->m_bufferPtr;
1964 parser->m_parseEndPtr = parser->m_bufferEnd;
1965 parser->m_eventPtr = parser->m_bufferPtr;
1966 parser->m_eventEndPtr = parser->m_bufferPtr;
1967 return result;
1969 #endif /* not defined XML_CONTEXT_BYTES */
1970 else {
1971 void *buff = XML_GetBuffer(parser, len);
1972 if (buff == NULL)
1973 return XML_STATUS_ERROR;
1974 else {
1975 memcpy(buff, s, len);
1976 return XML_ParseBuffer(parser, len, isFinal);
1981 enum XML_Status XMLCALL
1982 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
1983 const char *start;
1984 enum XML_Status result = XML_STATUS_OK;
1986 if (parser == NULL)
1987 return XML_STATUS_ERROR;
1988 switch (parser->m_parsingStatus.parsing) {
1989 case XML_SUSPENDED:
1990 parser->m_errorCode = XML_ERROR_SUSPENDED;
1991 return XML_STATUS_ERROR;
1992 case XML_FINISHED:
1993 parser->m_errorCode = XML_ERROR_FINISHED;
1994 return XML_STATUS_ERROR;
1995 case XML_INITIALIZED:
1996 /* Has someone called XML_GetBuffer successfully before? */
1997 if (! parser->m_bufferPtr) {
1998 parser->m_errorCode = XML_ERROR_NO_BUFFER;
1999 return XML_STATUS_ERROR;
2002 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2003 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2004 return XML_STATUS_ERROR;
2006 /* fall through */
2007 default:
2008 parser->m_parsingStatus.parsing = XML_PARSING;
2011 start = parser->m_bufferPtr;
2012 parser->m_positionPtr = start;
2013 parser->m_bufferEnd += len;
2014 parser->m_parseEndPtr = parser->m_bufferEnd;
2015 parser->m_parseEndByteIndex += len;
2016 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2018 parser->m_errorCode = parser->m_processor(
2019 parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
2021 if (parser->m_errorCode != XML_ERROR_NONE) {
2022 parser->m_eventEndPtr = parser->m_eventPtr;
2023 parser->m_processor = errorProcessor;
2024 return XML_STATUS_ERROR;
2025 } else {
2026 switch (parser->m_parsingStatus.parsing) {
2027 case XML_SUSPENDED:
2028 result = XML_STATUS_SUSPENDED;
2029 break;
2030 case XML_INITIALIZED:
2031 case XML_PARSING:
2032 if (isFinal) {
2033 parser->m_parsingStatus.parsing = XML_FINISHED;
2034 return result;
2036 default:; /* should not happen */
2040 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2041 parser->m_bufferPtr, &parser->m_position);
2042 parser->m_positionPtr = parser->m_bufferPtr;
2043 return result;
2046 void *XMLCALL
2047 XML_GetBuffer(XML_Parser parser, int len) {
2048 if (parser == NULL)
2049 return NULL;
2050 if (len < 0) {
2051 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2052 return NULL;
2054 switch (parser->m_parsingStatus.parsing) {
2055 case XML_SUSPENDED:
2056 parser->m_errorCode = XML_ERROR_SUSPENDED;
2057 return NULL;
2058 case XML_FINISHED:
2059 parser->m_errorCode = XML_ERROR_FINISHED;
2060 return NULL;
2061 default:;
2064 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
2065 #ifdef XML_CONTEXT_BYTES
2066 int keep;
2067 #endif /* defined XML_CONTEXT_BYTES */
2068 /* Do not invoke signed arithmetic overflow: */
2069 int neededSize = (int)((unsigned)len
2070 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2071 parser->m_bufferEnd, parser->m_bufferPtr));
2072 if (neededSize < 0) {
2073 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2074 return NULL;
2076 #ifdef XML_CONTEXT_BYTES
2077 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2078 if (keep > XML_CONTEXT_BYTES)
2079 keep = XML_CONTEXT_BYTES;
2080 /* Detect and prevent integer overflow */
2081 if (keep > INT_MAX - neededSize) {
2082 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2083 return NULL;
2085 neededSize += keep;
2086 #endif /* defined XML_CONTEXT_BYTES */
2087 if (neededSize
2088 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2089 #ifdef XML_CONTEXT_BYTES
2090 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2091 int offset
2092 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2093 - keep;
2094 /* The buffer pointers cannot be NULL here; we have at least some bytes
2095 * in the buffer */
2096 memmove(parser->m_buffer, &parser->m_buffer[offset],
2097 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2098 parser->m_bufferEnd -= offset;
2099 parser->m_bufferPtr -= offset;
2101 #else
2102 if (parser->m_buffer && parser->m_bufferPtr) {
2103 memmove(parser->m_buffer, parser->m_bufferPtr,
2104 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2105 parser->m_bufferEnd
2106 = parser->m_buffer
2107 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2108 parser->m_bufferPtr = parser->m_buffer;
2110 #endif /* not defined XML_CONTEXT_BYTES */
2111 } else {
2112 char *newBuf;
2113 int bufferSize
2114 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
2115 if (bufferSize == 0)
2116 bufferSize = INIT_BUFFER_SIZE;
2117 do {
2118 /* Do not invoke signed arithmetic overflow: */
2119 bufferSize = (int)(2U * (unsigned)bufferSize);
2120 } while (bufferSize < neededSize && bufferSize > 0);
2121 if (bufferSize <= 0) {
2122 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2123 return NULL;
2125 newBuf = (char *)MALLOC(parser, bufferSize);
2126 if (newBuf == 0) {
2127 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2128 return NULL;
2130 parser->m_bufferLim = newBuf + bufferSize;
2131 #ifdef XML_CONTEXT_BYTES
2132 if (parser->m_bufferPtr) {
2133 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2134 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2135 + keep);
2136 FREE(parser, parser->m_buffer);
2137 parser->m_buffer = newBuf;
2138 parser->m_bufferEnd
2139 = parser->m_buffer
2140 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2141 + keep;
2142 parser->m_bufferPtr = parser->m_buffer + keep;
2143 } else {
2144 /* This must be a brand new buffer with no data in it yet */
2145 parser->m_bufferEnd = newBuf;
2146 parser->m_bufferPtr = parser->m_buffer = newBuf;
2148 #else
2149 if (parser->m_bufferPtr) {
2150 memcpy(newBuf, parser->m_bufferPtr,
2151 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2152 FREE(parser, parser->m_buffer);
2153 parser->m_bufferEnd
2154 = newBuf
2155 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2156 } else {
2157 /* This must be a brand new buffer with no data in it yet */
2158 parser->m_bufferEnd = newBuf;
2160 parser->m_bufferPtr = parser->m_buffer = newBuf;
2161 #endif /* not defined XML_CONTEXT_BYTES */
2163 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2164 parser->m_positionPtr = NULL;
2166 return parser->m_bufferEnd;
2169 enum XML_Status XMLCALL
2170 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2171 if (parser == NULL)
2172 return XML_STATUS_ERROR;
2173 switch (parser->m_parsingStatus.parsing) {
2174 case XML_SUSPENDED:
2175 if (resumable) {
2176 parser->m_errorCode = XML_ERROR_SUSPENDED;
2177 return XML_STATUS_ERROR;
2179 parser->m_parsingStatus.parsing = XML_FINISHED;
2180 break;
2181 case XML_FINISHED:
2182 parser->m_errorCode = XML_ERROR_FINISHED;
2183 return XML_STATUS_ERROR;
2184 default:
2185 if (resumable) {
2186 #ifdef XML_DTD
2187 if (parser->m_isParamEntity) {
2188 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2189 return XML_STATUS_ERROR;
2191 #endif
2192 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2193 } else
2194 parser->m_parsingStatus.parsing = XML_FINISHED;
2196 return XML_STATUS_OK;
2199 enum XML_Status XMLCALL
2200 XML_ResumeParser(XML_Parser parser) {
2201 enum XML_Status result = XML_STATUS_OK;
2203 if (parser == NULL)
2204 return XML_STATUS_ERROR;
2205 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2206 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2207 return XML_STATUS_ERROR;
2209 parser->m_parsingStatus.parsing = XML_PARSING;
2211 parser->m_errorCode = parser->m_processor(
2212 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2214 if (parser->m_errorCode != XML_ERROR_NONE) {
2215 parser->m_eventEndPtr = parser->m_eventPtr;
2216 parser->m_processor = errorProcessor;
2217 return XML_STATUS_ERROR;
2218 } else {
2219 switch (parser->m_parsingStatus.parsing) {
2220 case XML_SUSPENDED:
2221 result = XML_STATUS_SUSPENDED;
2222 break;
2223 case XML_INITIALIZED:
2224 case XML_PARSING:
2225 if (parser->m_parsingStatus.finalBuffer) {
2226 parser->m_parsingStatus.parsing = XML_FINISHED;
2227 return result;
2229 default:;
2233 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2234 parser->m_bufferPtr, &parser->m_position);
2235 parser->m_positionPtr = parser->m_bufferPtr;
2236 return result;
2239 void XMLCALL
2240 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2241 if (parser == NULL)
2242 return;
2243 assert(status != NULL);
2244 *status = parser->m_parsingStatus;
2247 enum XML_Error XMLCALL
2248 XML_GetErrorCode(XML_Parser parser) {
2249 if (parser == NULL)
2250 return XML_ERROR_INVALID_ARGUMENT;
2251 return parser->m_errorCode;
2254 XML_Index XMLCALL
2255 XML_GetCurrentByteIndex(XML_Parser parser) {
2256 if (parser == NULL)
2257 return -1;
2258 if (parser->m_eventPtr)
2259 return (XML_Index)(parser->m_parseEndByteIndex
2260 - (parser->m_parseEndPtr - parser->m_eventPtr));
2261 return -1;
2264 int XMLCALL
2265 XML_GetCurrentByteCount(XML_Parser parser) {
2266 if (parser == NULL)
2267 return 0;
2268 if (parser->m_eventEndPtr && parser->m_eventPtr)
2269 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2270 return 0;
2273 const char *XMLCALL
2274 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2275 #ifdef XML_CONTEXT_BYTES
2276 if (parser == NULL)
2277 return NULL;
2278 if (parser->m_eventPtr && parser->m_buffer) {
2279 if (offset != NULL)
2280 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2281 if (size != NULL)
2282 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2283 return parser->m_buffer;
2285 #else
2286 (void)parser;
2287 (void)offset;
2288 (void)size;
2289 #endif /* defined XML_CONTEXT_BYTES */
2290 return (const char *)0;
2293 XML_Size XMLCALL
2294 XML_GetCurrentLineNumber(XML_Parser parser) {
2295 if (parser == NULL)
2296 return 0;
2297 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2298 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2299 parser->m_eventPtr, &parser->m_position);
2300 parser->m_positionPtr = parser->m_eventPtr;
2302 return parser->m_position.lineNumber + 1;
2305 XML_Size XMLCALL
2306 XML_GetCurrentColumnNumber(XML_Parser parser) {
2307 if (parser == NULL)
2308 return 0;
2309 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2310 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2311 parser->m_eventPtr, &parser->m_position);
2312 parser->m_positionPtr = parser->m_eventPtr;
2314 return parser->m_position.columnNumber;
2317 void XMLCALL
2318 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2319 if (parser != NULL)
2320 FREE(parser, model);
2323 void *XMLCALL
2324 XML_MemMalloc(XML_Parser parser, size_t size) {
2325 if (parser == NULL)
2326 return NULL;
2327 return MALLOC(parser, size);
2330 void *XMLCALL
2331 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2332 if (parser == NULL)
2333 return NULL;
2334 return REALLOC(parser, ptr, size);
2337 void XMLCALL
2338 XML_MemFree(XML_Parser parser, void *ptr) {
2339 if (parser != NULL)
2340 FREE(parser, ptr);
2343 void XMLCALL
2344 XML_DefaultCurrent(XML_Parser parser) {
2345 if (parser == NULL)
2346 return;
2347 if (parser->m_defaultHandler) {
2348 if (parser->m_openInternalEntities)
2349 reportDefault(parser, parser->m_internalEncoding,
2350 parser->m_openInternalEntities->internalEventPtr,
2351 parser->m_openInternalEntities->internalEventEndPtr);
2352 else
2353 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2354 parser->m_eventEndPtr);
2358 const XML_LChar *XMLCALL
2359 XML_ErrorString(enum XML_Error code) {
2360 switch (code) {
2361 case XML_ERROR_NONE:
2362 return NULL;
2363 case XML_ERROR_NO_MEMORY:
2364 return XML_L("out of memory");
2365 case XML_ERROR_SYNTAX:
2366 return XML_L("syntax error");
2367 case XML_ERROR_NO_ELEMENTS:
2368 return XML_L("no element found");
2369 case XML_ERROR_INVALID_TOKEN:
2370 return XML_L("not well-formed (invalid token)");
2371 case XML_ERROR_UNCLOSED_TOKEN:
2372 return XML_L("unclosed token");
2373 case XML_ERROR_PARTIAL_CHAR:
2374 return XML_L("partial character");
2375 case XML_ERROR_TAG_MISMATCH:
2376 return XML_L("mismatched tag");
2377 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2378 return XML_L("duplicate attribute");
2379 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2380 return XML_L("junk after document element");
2381 case XML_ERROR_PARAM_ENTITY_REF:
2382 return XML_L("illegal parameter entity reference");
2383 case XML_ERROR_UNDEFINED_ENTITY:
2384 return XML_L("undefined entity");
2385 case XML_ERROR_RECURSIVE_ENTITY_REF:
2386 return XML_L("recursive entity reference");
2387 case XML_ERROR_ASYNC_ENTITY:
2388 return XML_L("asynchronous entity");
2389 case XML_ERROR_BAD_CHAR_REF:
2390 return XML_L("reference to invalid character number");
2391 case XML_ERROR_BINARY_ENTITY_REF:
2392 return XML_L("reference to binary entity");
2393 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2394 return XML_L("reference to external entity in attribute");
2395 case XML_ERROR_MISPLACED_XML_PI:
2396 return XML_L("XML or text declaration not at start of entity");
2397 case XML_ERROR_UNKNOWN_ENCODING:
2398 return XML_L("unknown encoding");
2399 case XML_ERROR_INCORRECT_ENCODING:
2400 return XML_L("encoding specified in XML declaration is incorrect");
2401 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2402 return XML_L("unclosed CDATA section");
2403 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2404 return XML_L("error in processing external entity reference");
2405 case XML_ERROR_NOT_STANDALONE:
2406 return XML_L("document is not standalone");
2407 case XML_ERROR_UNEXPECTED_STATE:
2408 return XML_L("unexpected parser state - please send a bug report");
2409 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2410 return XML_L("entity declared in parameter entity");
2411 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2412 return XML_L("requested feature requires XML_DTD support in Expat");
2413 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2414 return XML_L("cannot change setting once parsing has begun");
2415 /* Added in 1.95.7. */
2416 case XML_ERROR_UNBOUND_PREFIX:
2417 return XML_L("unbound prefix");
2418 /* Added in 1.95.8. */
2419 case XML_ERROR_UNDECLARING_PREFIX:
2420 return XML_L("must not undeclare prefix");
2421 case XML_ERROR_INCOMPLETE_PE:
2422 return XML_L("incomplete markup in parameter entity");
2423 case XML_ERROR_XML_DECL:
2424 return XML_L("XML declaration not well-formed");
2425 case XML_ERROR_TEXT_DECL:
2426 return XML_L("text declaration not well-formed");
2427 case XML_ERROR_PUBLICID:
2428 return XML_L("illegal character(s) in public id");
2429 case XML_ERROR_SUSPENDED:
2430 return XML_L("parser suspended");
2431 case XML_ERROR_NOT_SUSPENDED:
2432 return XML_L("parser not suspended");
2433 case XML_ERROR_ABORTED:
2434 return XML_L("parsing aborted");
2435 case XML_ERROR_FINISHED:
2436 return XML_L("parsing finished");
2437 case XML_ERROR_SUSPEND_PE:
2438 return XML_L("cannot suspend in external parameter entity");
2439 /* Added in 2.0.0. */
2440 case XML_ERROR_RESERVED_PREFIX_XML:
2441 return XML_L(
2442 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2443 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2444 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2445 case XML_ERROR_RESERVED_NAMESPACE_URI:
2446 return XML_L(
2447 "prefix must not be bound to one of the reserved namespace names");
2448 /* Added in 2.2.5. */
2449 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2450 return XML_L("invalid argument");
2451 /* Added in 2.3.0. */
2452 case XML_ERROR_NO_BUFFER:
2453 return XML_L(
2454 "a successful prior call to function XML_GetBuffer is required");
2455 /* Added in 2.4.0. */
2456 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2457 return XML_L(
2458 "limit on input amplification factor (from DTD and entities) breached");
2460 return NULL;
2463 const XML_LChar *XMLCALL
2464 XML_ExpatVersion(void) {
2465 /* V1 is used to string-ize the version number. However, it would
2466 string-ize the actual version macro *names* unless we get them
2467 substituted before being passed to V1. CPP is defined to expand
2468 a macro, then rescan for more expansions. Thus, we use V2 to expand
2469 the version macros, then CPP will expand the resulting V1() macro
2470 with the correct numerals. */
2471 /* ### I'm assuming cpp is portable in this respect... */
2473 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2474 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2476 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2478 #undef V1
2479 #undef V2
2482 XML_Expat_Version XMLCALL
2483 XML_ExpatVersionInfo(void) {
2484 XML_Expat_Version version;
2486 version.major = XML_MAJOR_VERSION;
2487 version.minor = XML_MINOR_VERSION;
2488 version.micro = XML_MICRO_VERSION;
2490 return version;
2493 const XML_Feature *XMLCALL
2494 XML_GetFeatureList(void) {
2495 static const XML_Feature features[] = {
2496 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2497 sizeof(XML_Char)},
2498 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2499 sizeof(XML_LChar)},
2500 #ifdef XML_UNICODE
2501 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2502 #endif
2503 #ifdef XML_UNICODE_WCHAR_T
2504 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2505 #endif
2506 #ifdef XML_DTD
2507 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2508 #endif
2509 #ifdef XML_CONTEXT_BYTES
2510 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2511 XML_CONTEXT_BYTES},
2512 #endif
2513 #ifdef XML_MIN_SIZE
2514 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2515 #endif
2516 #ifdef XML_NS
2517 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2518 #endif
2519 #ifdef XML_LARGE_SIZE
2520 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2521 #endif
2522 #ifdef XML_ATTR_INFO
2523 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2524 #endif
2525 #ifdef XML_DTD
2526 /* Added in Expat 2.4.0. */
2527 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2528 XML_L("XML_BLAP_MAX_AMP"),
2529 (long int)
2530 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2531 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2532 XML_L("XML_BLAP_ACT_THRES"),
2533 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2534 #endif
2535 {XML_FEATURE_END, NULL, 0}};
2537 return features;
2540 #ifdef XML_DTD
2541 XML_Bool XMLCALL
2542 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2543 XML_Parser parser, float maximumAmplificationFactor) {
2544 if ((parser == NULL) || (parser->m_parentParser != NULL)
2545 || isnan(maximumAmplificationFactor)
2546 || (maximumAmplificationFactor < 1.0f)) {
2547 return XML_FALSE;
2549 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2550 return XML_TRUE;
2553 XML_Bool XMLCALL
2554 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2555 XML_Parser parser, unsigned long long activationThresholdBytes) {
2556 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2557 return XML_FALSE;
2559 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2560 return XML_TRUE;
2562 #endif /* XML_DTD */
2564 /* Initially tag->rawName always points into the parse buffer;
2565 for those TAG instances opened while the current parse buffer was
2566 processed, and not yet closed, we need to store tag->rawName in a more
2567 permanent location, since the parse buffer is about to be discarded.
2569 static XML_Bool
2570 storeRawNames(XML_Parser parser) {
2571 TAG *tag = parser->m_tagStack;
2572 while (tag) {
2573 int bufSize;
2574 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2575 size_t rawNameLen;
2576 char *rawNameBuf = tag->buf + nameLen;
2577 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2578 at the first entry that has already been copied; everything
2579 below it in the stack is already been accounted for in a
2580 previous call to this function.
2582 if (tag->rawName == rawNameBuf)
2583 break;
2584 /* For re-use purposes we need to ensure that the
2585 size of tag->buf is a multiple of sizeof(XML_Char).
2587 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2588 /* Detect and prevent integer overflow. */
2589 if (rawNameLen > (size_t)INT_MAX - nameLen)
2590 return XML_FALSE;
2591 bufSize = nameLen + (int)rawNameLen;
2592 if (bufSize > tag->bufEnd - tag->buf) {
2593 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2594 if (temp == NULL)
2595 return XML_FALSE;
2596 /* if tag->name.str points to tag->buf (only when namespace
2597 processing is off) then we have to update it
2599 if (tag->name.str == (XML_Char *)tag->buf)
2600 tag->name.str = (XML_Char *)temp;
2601 /* if tag->name.localPart is set (when namespace processing is on)
2602 then update it as well, since it will always point into tag->buf
2604 if (tag->name.localPart)
2605 tag->name.localPart
2606 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2607 tag->buf = temp;
2608 tag->bufEnd = temp + bufSize;
2609 rawNameBuf = temp + nameLen;
2611 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2612 tag->rawName = rawNameBuf;
2613 tag = tag->parent;
2615 return XML_TRUE;
2618 static enum XML_Error PTRCALL
2619 contentProcessor(XML_Parser parser, const char *start, const char *end,
2620 const char **endPtr) {
2621 enum XML_Error result = doContent(
2622 parser, 0, parser->m_encoding, start, end, endPtr,
2623 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2624 if (result == XML_ERROR_NONE) {
2625 if (! storeRawNames(parser))
2626 return XML_ERROR_NO_MEMORY;
2628 return result;
2631 static enum XML_Error PTRCALL
2632 externalEntityInitProcessor(XML_Parser parser, const char *start,
2633 const char *end, const char **endPtr) {
2634 enum XML_Error result = initializeEncoding(parser);
2635 if (result != XML_ERROR_NONE)
2636 return result;
2637 parser->m_processor = externalEntityInitProcessor2;
2638 return externalEntityInitProcessor2(parser, start, end, endPtr);
2641 static enum XML_Error PTRCALL
2642 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2643 const char *end, const char **endPtr) {
2644 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2645 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2646 switch (tok) {
2647 case XML_TOK_BOM:
2648 #ifdef XML_DTD
2649 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2650 XML_ACCOUNT_DIRECT)) {
2651 accountingOnAbort(parser);
2652 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2654 #endif /* XML_DTD */
2656 /* If we are at the end of the buffer, this would cause the next stage,
2657 i.e. externalEntityInitProcessor3, to pass control directly to
2658 doContent (by detecting XML_TOK_NONE) without processing any xml text
2659 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2661 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2662 *endPtr = next;
2663 return XML_ERROR_NONE;
2665 start = next;
2666 break;
2667 case XML_TOK_PARTIAL:
2668 if (! parser->m_parsingStatus.finalBuffer) {
2669 *endPtr = start;
2670 return XML_ERROR_NONE;
2672 parser->m_eventPtr = start;
2673 return XML_ERROR_UNCLOSED_TOKEN;
2674 case XML_TOK_PARTIAL_CHAR:
2675 if (! parser->m_parsingStatus.finalBuffer) {
2676 *endPtr = start;
2677 return XML_ERROR_NONE;
2679 parser->m_eventPtr = start;
2680 return XML_ERROR_PARTIAL_CHAR;
2682 parser->m_processor = externalEntityInitProcessor3;
2683 return externalEntityInitProcessor3(parser, start, end, endPtr);
2686 static enum XML_Error PTRCALL
2687 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2688 const char *end, const char **endPtr) {
2689 int tok;
2690 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2691 parser->m_eventPtr = start;
2692 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2693 /* Note: These bytes are accounted later in:
2694 - processXmlDecl
2695 - externalEntityContentProcessor
2697 parser->m_eventEndPtr = next;
2699 switch (tok) {
2700 case XML_TOK_XML_DECL: {
2701 enum XML_Error result;
2702 result = processXmlDecl(parser, 1, start, next);
2703 if (result != XML_ERROR_NONE)
2704 return result;
2705 switch (parser->m_parsingStatus.parsing) {
2706 case XML_SUSPENDED:
2707 *endPtr = next;
2708 return XML_ERROR_NONE;
2709 case XML_FINISHED:
2710 return XML_ERROR_ABORTED;
2711 default:
2712 start = next;
2714 } break;
2715 case XML_TOK_PARTIAL:
2716 if (! parser->m_parsingStatus.finalBuffer) {
2717 *endPtr = start;
2718 return XML_ERROR_NONE;
2720 return XML_ERROR_UNCLOSED_TOKEN;
2721 case XML_TOK_PARTIAL_CHAR:
2722 if (! parser->m_parsingStatus.finalBuffer) {
2723 *endPtr = start;
2724 return XML_ERROR_NONE;
2726 return XML_ERROR_PARTIAL_CHAR;
2728 parser->m_processor = externalEntityContentProcessor;
2729 parser->m_tagLevel = 1;
2730 return externalEntityContentProcessor(parser, start, end, endPtr);
2733 static enum XML_Error PTRCALL
2734 externalEntityContentProcessor(XML_Parser parser, const char *start,
2735 const char *end, const char **endPtr) {
2736 enum XML_Error result
2737 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2738 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2739 XML_ACCOUNT_ENTITY_EXPANSION);
2740 if (result == XML_ERROR_NONE) {
2741 if (! storeRawNames(parser))
2742 return XML_ERROR_NO_MEMORY;
2744 return result;
2747 static enum XML_Error
2748 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2749 const char *s, const char *end, const char **nextPtr,
2750 XML_Bool haveMore, enum XML_Account account) {
2751 /* save one level of indirection */
2752 DTD *const dtd = parser->m_dtd;
2754 const char **eventPP;
2755 const char **eventEndPP;
2756 if (enc == parser->m_encoding) {
2757 eventPP = &parser->m_eventPtr;
2758 eventEndPP = &parser->m_eventEndPtr;
2759 } else {
2760 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2761 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2763 *eventPP = s;
2765 for (;;) {
2766 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2767 int tok = XmlContentTok(enc, s, end, &next);
2768 #ifdef XML_DTD
2769 const char *accountAfter
2770 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2771 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2772 : next;
2773 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2774 account)) {
2775 accountingOnAbort(parser);
2776 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2778 #endif
2779 *eventEndPP = next;
2780 switch (tok) {
2781 case XML_TOK_TRAILING_CR:
2782 if (haveMore) {
2783 *nextPtr = s;
2784 return XML_ERROR_NONE;
2786 *eventEndPP = end;
2787 if (parser->m_characterDataHandler) {
2788 XML_Char c = 0xA;
2789 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2790 } else if (parser->m_defaultHandler)
2791 reportDefault(parser, enc, s, end);
2792 /* We are at the end of the final buffer, should we check for
2793 XML_SUSPENDED, XML_FINISHED?
2795 if (startTagLevel == 0)
2796 return XML_ERROR_NO_ELEMENTS;
2797 if (parser->m_tagLevel != startTagLevel)
2798 return XML_ERROR_ASYNC_ENTITY;
2799 *nextPtr = end;
2800 return XML_ERROR_NONE;
2801 case XML_TOK_NONE:
2802 if (haveMore) {
2803 *nextPtr = s;
2804 return XML_ERROR_NONE;
2806 if (startTagLevel > 0) {
2807 if (parser->m_tagLevel != startTagLevel)
2808 return XML_ERROR_ASYNC_ENTITY;
2809 *nextPtr = s;
2810 return XML_ERROR_NONE;
2812 return XML_ERROR_NO_ELEMENTS;
2813 case XML_TOK_INVALID:
2814 *eventPP = next;
2815 return XML_ERROR_INVALID_TOKEN;
2816 case XML_TOK_PARTIAL:
2817 if (haveMore) {
2818 *nextPtr = s;
2819 return XML_ERROR_NONE;
2821 return XML_ERROR_UNCLOSED_TOKEN;
2822 case XML_TOK_PARTIAL_CHAR:
2823 if (haveMore) {
2824 *nextPtr = s;
2825 return XML_ERROR_NONE;
2827 return XML_ERROR_PARTIAL_CHAR;
2828 case XML_TOK_ENTITY_REF: {
2829 const XML_Char *name;
2830 ENTITY *entity;
2831 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2832 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2833 if (ch) {
2834 #ifdef XML_DTD
2835 /* NOTE: We are replacing 4-6 characters original input for 1 character
2836 * so there is no amplification and hence recording without
2837 * protection. */
2838 accountingDiffTolerated(parser, tok, (char *)&ch,
2839 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2840 XML_ACCOUNT_ENTITY_EXPANSION);
2841 #endif /* XML_DTD */
2842 if (parser->m_characterDataHandler)
2843 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2844 else if (parser->m_defaultHandler)
2845 reportDefault(parser, enc, s, next);
2846 break;
2848 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2849 next - enc->minBytesPerChar);
2850 if (! name)
2851 return XML_ERROR_NO_MEMORY;
2852 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2853 poolDiscard(&dtd->pool);
2854 /* First, determine if a check for an existing declaration is needed;
2855 if yes, check that the entity exists, and that it is internal,
2856 otherwise call the skipped entity or default handler.
2858 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2859 if (! entity)
2860 return XML_ERROR_UNDEFINED_ENTITY;
2861 else if (! entity->is_internal)
2862 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2863 } else if (! entity) {
2864 if (parser->m_skippedEntityHandler)
2865 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2866 else if (parser->m_defaultHandler)
2867 reportDefault(parser, enc, s, next);
2868 break;
2870 if (entity->open)
2871 return XML_ERROR_RECURSIVE_ENTITY_REF;
2872 if (entity->notation)
2873 return XML_ERROR_BINARY_ENTITY_REF;
2874 if (entity->textPtr) {
2875 enum XML_Error result;
2876 if (! parser->m_defaultExpandInternalEntities) {
2877 if (parser->m_skippedEntityHandler)
2878 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2880 else if (parser->m_defaultHandler)
2881 reportDefault(parser, enc, s, next);
2882 break;
2884 result = processInternalEntity(parser, entity, XML_FALSE);
2885 if (result != XML_ERROR_NONE)
2886 return result;
2887 } else if (parser->m_externalEntityRefHandler) {
2888 const XML_Char *context;
2889 entity->open = XML_TRUE;
2890 context = getContext(parser);
2891 entity->open = XML_FALSE;
2892 if (! context)
2893 return XML_ERROR_NO_MEMORY;
2894 if (! parser->m_externalEntityRefHandler(
2895 parser->m_externalEntityRefHandlerArg, context, entity->base,
2896 entity->systemId, entity->publicId))
2897 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2898 poolDiscard(&parser->m_tempPool);
2899 } else if (parser->m_defaultHandler)
2900 reportDefault(parser, enc, s, next);
2901 break;
2903 case XML_TOK_START_TAG_NO_ATTS:
2904 /* fall through */
2905 case XML_TOK_START_TAG_WITH_ATTS: {
2906 TAG *tag;
2907 enum XML_Error result;
2908 XML_Char *toPtr;
2909 if (parser->m_freeTagList) {
2910 tag = parser->m_freeTagList;
2911 parser->m_freeTagList = parser->m_freeTagList->parent;
2912 } else {
2913 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2914 if (! tag)
2915 return XML_ERROR_NO_MEMORY;
2916 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2917 if (! tag->buf) {
2918 FREE(parser, tag);
2919 return XML_ERROR_NO_MEMORY;
2921 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2923 tag->bindings = NULL;
2924 tag->parent = parser->m_tagStack;
2925 parser->m_tagStack = tag;
2926 tag->name.localPart = NULL;
2927 tag->name.prefix = NULL;
2928 tag->rawName = s + enc->minBytesPerChar;
2929 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2930 ++parser->m_tagLevel;
2932 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2933 const char *fromPtr = tag->rawName;
2934 toPtr = (XML_Char *)tag->buf;
2935 for (;;) {
2936 int bufSize;
2937 int convLen;
2938 const enum XML_Convert_Result convert_res
2939 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2940 (ICHAR *)tag->bufEnd - 1);
2941 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2942 if ((fromPtr >= rawNameEnd)
2943 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2944 tag->name.strLen = convLen;
2945 break;
2947 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2949 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2950 if (temp == NULL)
2951 return XML_ERROR_NO_MEMORY;
2952 tag->buf = temp;
2953 tag->bufEnd = temp + bufSize;
2954 toPtr = (XML_Char *)temp + convLen;
2958 tag->name.str = (XML_Char *)tag->buf;
2959 *toPtr = XML_T('\0');
2960 result
2961 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
2962 if (result)
2963 return result;
2964 if (parser->m_startElementHandler)
2965 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2966 (const XML_Char **)parser->m_atts);
2967 else if (parser->m_defaultHandler)
2968 reportDefault(parser, enc, s, next);
2969 poolClear(&parser->m_tempPool);
2970 break;
2972 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2973 /* fall through */
2974 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2975 const char *rawName = s + enc->minBytesPerChar;
2976 enum XML_Error result;
2977 BINDING *bindings = NULL;
2978 XML_Bool noElmHandlers = XML_TRUE;
2979 TAG_NAME name;
2980 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2981 rawName + XmlNameLength(enc, rawName));
2982 if (! name.str)
2983 return XML_ERROR_NO_MEMORY;
2984 poolFinish(&parser->m_tempPool);
2985 result = storeAtts(parser, enc, s, &name, &bindings,
2986 XML_ACCOUNT_NONE /* token spans whole start tag */);
2987 if (result != XML_ERROR_NONE) {
2988 freeBindings(parser, bindings);
2989 return result;
2991 poolFinish(&parser->m_tempPool);
2992 if (parser->m_startElementHandler) {
2993 parser->m_startElementHandler(parser->m_handlerArg, name.str,
2994 (const XML_Char **)parser->m_atts);
2995 noElmHandlers = XML_FALSE;
2997 if (parser->m_endElementHandler) {
2998 if (parser->m_startElementHandler)
2999 *eventPP = *eventEndPP;
3000 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3001 noElmHandlers = XML_FALSE;
3003 if (noElmHandlers && parser->m_defaultHandler)
3004 reportDefault(parser, enc, s, next);
3005 poolClear(&parser->m_tempPool);
3006 freeBindings(parser, bindings);
3008 if ((parser->m_tagLevel == 0)
3009 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3010 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3011 parser->m_processor = epilogProcessor;
3012 else
3013 return epilogProcessor(parser, next, end, nextPtr);
3015 break;
3016 case XML_TOK_END_TAG:
3017 if (parser->m_tagLevel == startTagLevel)
3018 return XML_ERROR_ASYNC_ENTITY;
3019 else {
3020 int len;
3021 const char *rawName;
3022 TAG *tag = parser->m_tagStack;
3023 rawName = s + enc->minBytesPerChar * 2;
3024 len = XmlNameLength(enc, rawName);
3025 if (len != tag->rawNameLength
3026 || memcmp(tag->rawName, rawName, len) != 0) {
3027 *eventPP = rawName;
3028 return XML_ERROR_TAG_MISMATCH;
3030 parser->m_tagStack = tag->parent;
3031 tag->parent = parser->m_freeTagList;
3032 parser->m_freeTagList = tag;
3033 --parser->m_tagLevel;
3034 if (parser->m_endElementHandler) {
3035 const XML_Char *localPart;
3036 const XML_Char *prefix;
3037 XML_Char *uri;
3038 localPart = tag->name.localPart;
3039 if (parser->m_ns && localPart) {
3040 /* localPart and prefix may have been overwritten in
3041 tag->name.str, since this points to the binding->uri
3042 buffer which gets re-used; so we have to add them again
3044 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3045 /* don't need to check for space - already done in storeAtts() */
3046 while (*localPart)
3047 *uri++ = *localPart++;
3048 prefix = (XML_Char *)tag->name.prefix;
3049 if (parser->m_ns_triplets && prefix) {
3050 *uri++ = parser->m_namespaceSeparator;
3051 while (*prefix)
3052 *uri++ = *prefix++;
3054 *uri = XML_T('\0');
3056 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3057 } else if (parser->m_defaultHandler)
3058 reportDefault(parser, enc, s, next);
3059 while (tag->bindings) {
3060 BINDING *b = tag->bindings;
3061 if (parser->m_endNamespaceDeclHandler)
3062 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3063 b->prefix->name);
3064 tag->bindings = tag->bindings->nextTagBinding;
3065 b->nextTagBinding = parser->m_freeBindingList;
3066 parser->m_freeBindingList = b;
3067 b->prefix->binding = b->prevPrefixBinding;
3069 if ((parser->m_tagLevel == 0)
3070 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3071 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3072 parser->m_processor = epilogProcessor;
3073 else
3074 return epilogProcessor(parser, next, end, nextPtr);
3077 break;
3078 case XML_TOK_CHAR_REF: {
3079 int n = XmlCharRefNumber(enc, s);
3080 if (n < 0)
3081 return XML_ERROR_BAD_CHAR_REF;
3082 if (parser->m_characterDataHandler) {
3083 XML_Char buf[XML_ENCODE_MAX];
3084 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3085 XmlEncode(n, (ICHAR *)buf));
3086 } else if (parser->m_defaultHandler)
3087 reportDefault(parser, enc, s, next);
3088 } break;
3089 case XML_TOK_XML_DECL:
3090 return XML_ERROR_MISPLACED_XML_PI;
3091 case XML_TOK_DATA_NEWLINE:
3092 if (parser->m_characterDataHandler) {
3093 XML_Char c = 0xA;
3094 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3095 } else if (parser->m_defaultHandler)
3096 reportDefault(parser, enc, s, next);
3097 break;
3098 case XML_TOK_CDATA_SECT_OPEN: {
3099 enum XML_Error result;
3100 if (parser->m_startCdataSectionHandler)
3101 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3102 /* BEGIN disabled code */
3103 /* Suppose you doing a transformation on a document that involves
3104 changing only the character data. You set up a defaultHandler
3105 and a characterDataHandler. The defaultHandler simply copies
3106 characters through. The characterDataHandler does the
3107 transformation and writes the characters out escaping them as
3108 necessary. This case will fail to work if we leave out the
3109 following two lines (because & and < inside CDATA sections will
3110 be incorrectly escaped).
3112 However, now we have a start/endCdataSectionHandler, so it seems
3113 easier to let the user deal with this.
3115 else if (0 && parser->m_characterDataHandler)
3116 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3118 /* END disabled code */
3119 else if (parser->m_defaultHandler)
3120 reportDefault(parser, enc, s, next);
3121 result
3122 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3123 if (result != XML_ERROR_NONE)
3124 return result;
3125 else if (! next) {
3126 parser->m_processor = cdataSectionProcessor;
3127 return result;
3129 } break;
3130 case XML_TOK_TRAILING_RSQB:
3131 if (haveMore) {
3132 *nextPtr = s;
3133 return XML_ERROR_NONE;
3135 if (parser->m_characterDataHandler) {
3136 if (MUST_CONVERT(enc, s)) {
3137 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3138 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3139 parser->m_characterDataHandler(
3140 parser->m_handlerArg, parser->m_dataBuf,
3141 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3142 } else
3143 parser->m_characterDataHandler(
3144 parser->m_handlerArg, (XML_Char *)s,
3145 (int)((XML_Char *)end - (XML_Char *)s));
3146 } else if (parser->m_defaultHandler)
3147 reportDefault(parser, enc, s, end);
3148 /* We are at the end of the final buffer, should we check for
3149 XML_SUSPENDED, XML_FINISHED?
3151 if (startTagLevel == 0) {
3152 *eventPP = end;
3153 return XML_ERROR_NO_ELEMENTS;
3155 if (parser->m_tagLevel != startTagLevel) {
3156 *eventPP = end;
3157 return XML_ERROR_ASYNC_ENTITY;
3159 *nextPtr = end;
3160 return XML_ERROR_NONE;
3161 case XML_TOK_DATA_CHARS: {
3162 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3163 if (charDataHandler) {
3164 if (MUST_CONVERT(enc, s)) {
3165 for (;;) {
3166 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3167 const enum XML_Convert_Result convert_res = XmlConvert(
3168 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3169 *eventEndPP = s;
3170 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3171 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3172 if ((convert_res == XML_CONVERT_COMPLETED)
3173 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3174 break;
3175 *eventPP = s;
3177 } else
3178 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3179 (int)((XML_Char *)next - (XML_Char *)s));
3180 } else if (parser->m_defaultHandler)
3181 reportDefault(parser, enc, s, next);
3182 } break;
3183 case XML_TOK_PI:
3184 if (! reportProcessingInstruction(parser, enc, s, next))
3185 return XML_ERROR_NO_MEMORY;
3186 break;
3187 case XML_TOK_COMMENT:
3188 if (! reportComment(parser, enc, s, next))
3189 return XML_ERROR_NO_MEMORY;
3190 break;
3191 default:
3192 /* All of the tokens produced by XmlContentTok() have their own
3193 * explicit cases, so this default is not strictly necessary.
3194 * However it is a useful safety net, so we retain the code and
3195 * simply exclude it from the coverage tests.
3197 * LCOV_EXCL_START
3199 if (parser->m_defaultHandler)
3200 reportDefault(parser, enc, s, next);
3201 break;
3202 /* LCOV_EXCL_STOP */
3204 *eventPP = s = next;
3205 switch (parser->m_parsingStatus.parsing) {
3206 case XML_SUSPENDED:
3207 *nextPtr = next;
3208 return XML_ERROR_NONE;
3209 case XML_FINISHED:
3210 return XML_ERROR_ABORTED;
3211 default:;
3214 /* not reached */
3217 /* This function does not call free() on the allocated memory, merely
3218 * moving it to the parser's m_freeBindingList where it can be freed or
3219 * reused as appropriate.
3221 static void
3222 freeBindings(XML_Parser parser, BINDING *bindings) {
3223 while (bindings) {
3224 BINDING *b = bindings;
3226 /* m_startNamespaceDeclHandler will have been called for this
3227 * binding in addBindings(), so call the end handler now.
3229 if (parser->m_endNamespaceDeclHandler)
3230 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3232 bindings = bindings->nextTagBinding;
3233 b->nextTagBinding = parser->m_freeBindingList;
3234 parser->m_freeBindingList = b;
3235 b->prefix->binding = b->prevPrefixBinding;
3239 /* Precondition: all arguments must be non-NULL;
3240 Purpose:
3241 - normalize attributes
3242 - check attributes for well-formedness
3243 - generate namespace aware attribute names (URI, prefix)
3244 - build list of attributes for startElementHandler
3245 - default attributes
3246 - process namespace declarations (check and report them)
3247 - generate namespace aware element name (URI, prefix)
3249 static enum XML_Error
3250 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3251 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3252 enum XML_Account account) {
3253 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3254 ELEMENT_TYPE *elementType;
3255 int nDefaultAtts;
3256 const XML_Char **appAtts; /* the attribute list for the application */
3257 int attIndex = 0;
3258 int prefixLen;
3259 int i;
3260 int n;
3261 XML_Char *uri;
3262 int nPrefixes = 0;
3263 BINDING *binding;
3264 const XML_Char *localPart;
3266 /* lookup the element type name */
3267 elementType
3268 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3269 if (! elementType) {
3270 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3271 if (! name)
3272 return XML_ERROR_NO_MEMORY;
3273 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3274 sizeof(ELEMENT_TYPE));
3275 if (! elementType)
3276 return XML_ERROR_NO_MEMORY;
3277 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3278 return XML_ERROR_NO_MEMORY;
3280 nDefaultAtts = elementType->nDefaultAtts;
3282 /* get the attributes from the tokenizer */
3283 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3285 /* Detect and prevent integer overflow */
3286 if (n > INT_MAX - nDefaultAtts) {
3287 return XML_ERROR_NO_MEMORY;
3290 if (n + nDefaultAtts > parser->m_attsSize) {
3291 int oldAttsSize = parser->m_attsSize;
3292 ATTRIBUTE *temp;
3293 #ifdef XML_ATTR_INFO
3294 XML_AttrInfo *temp2;
3295 #endif
3297 /* Detect and prevent integer overflow */
3298 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3299 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3300 return XML_ERROR_NO_MEMORY;
3303 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3305 /* Detect and prevent integer overflow.
3306 * The preprocessor guard addresses the "always false" warning
3307 * from -Wtype-limits on platforms where
3308 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3309 #if UINT_MAX >= SIZE_MAX
3310 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3311 parser->m_attsSize = oldAttsSize;
3312 return XML_ERROR_NO_MEMORY;
3314 #endif
3316 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3317 parser->m_attsSize * sizeof(ATTRIBUTE));
3318 if (temp == NULL) {
3319 parser->m_attsSize = oldAttsSize;
3320 return XML_ERROR_NO_MEMORY;
3322 parser->m_atts = temp;
3323 #ifdef XML_ATTR_INFO
3324 /* Detect and prevent integer overflow.
3325 * The preprocessor guard addresses the "always false" warning
3326 * from -Wtype-limits on platforms where
3327 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3328 # if UINT_MAX >= SIZE_MAX
3329 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3330 parser->m_attsSize = oldAttsSize;
3331 return XML_ERROR_NO_MEMORY;
3333 # endif
3335 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3336 parser->m_attsSize * sizeof(XML_AttrInfo));
3337 if (temp2 == NULL) {
3338 parser->m_attsSize = oldAttsSize;
3339 return XML_ERROR_NO_MEMORY;
3341 parser->m_attInfo = temp2;
3342 #endif
3343 if (n > oldAttsSize)
3344 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3347 appAtts = (const XML_Char **)parser->m_atts;
3348 for (i = 0; i < n; i++) {
3349 ATTRIBUTE *currAtt = &parser->m_atts[i];
3350 #ifdef XML_ATTR_INFO
3351 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3352 #endif
3353 /* add the name and value to the attribute list */
3354 ATTRIBUTE_ID *attId
3355 = getAttributeId(parser, enc, currAtt->name,
3356 currAtt->name + XmlNameLength(enc, currAtt->name));
3357 if (! attId)
3358 return XML_ERROR_NO_MEMORY;
3359 #ifdef XML_ATTR_INFO
3360 currAttInfo->nameStart
3361 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3362 currAttInfo->nameEnd
3363 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3364 currAttInfo->valueStart = parser->m_parseEndByteIndex
3365 - (parser->m_parseEndPtr - currAtt->valuePtr);
3366 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3367 - (parser->m_parseEndPtr - currAtt->valueEnd);
3368 #endif
3369 /* Detect duplicate attributes by their QNames. This does not work when
3370 namespace processing is turned on and different prefixes for the same
3371 namespace are used. For this case we have a check further down.
3373 if ((attId->name)[-1]) {
3374 if (enc == parser->m_encoding)
3375 parser->m_eventPtr = parser->m_atts[i].name;
3376 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3378 (attId->name)[-1] = 1;
3379 appAtts[attIndex++] = attId->name;
3380 if (! parser->m_atts[i].normalized) {
3381 enum XML_Error result;
3382 XML_Bool isCdata = XML_TRUE;
3384 /* figure out whether declared as other than CDATA */
3385 if (attId->maybeTokenized) {
3386 int j;
3387 for (j = 0; j < nDefaultAtts; j++) {
3388 if (attId == elementType->defaultAtts[j].id) {
3389 isCdata = elementType->defaultAtts[j].isCdata;
3390 break;
3395 /* normalize the attribute value */
3396 result = storeAttributeValue(
3397 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3398 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3399 if (result)
3400 return result;
3401 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3402 poolFinish(&parser->m_tempPool);
3403 } else {
3404 /* the value did not need normalizing */
3405 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3406 parser->m_atts[i].valuePtr,
3407 parser->m_atts[i].valueEnd);
3408 if (appAtts[attIndex] == 0)
3409 return XML_ERROR_NO_MEMORY;
3410 poolFinish(&parser->m_tempPool);
3412 /* handle prefixed attribute names */
3413 if (attId->prefix) {
3414 if (attId->xmlns) {
3415 /* deal with namespace declarations here */
3416 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3417 appAtts[attIndex], bindingsPtr);
3418 if (result)
3419 return result;
3420 --attIndex;
3421 } else {
3422 /* deal with other prefixed names later */
3423 attIndex++;
3424 nPrefixes++;
3425 (attId->name)[-1] = 2;
3427 } else
3428 attIndex++;
3431 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3432 parser->m_nSpecifiedAtts = attIndex;
3433 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3434 for (i = 0; i < attIndex; i += 2)
3435 if (appAtts[i] == elementType->idAtt->name) {
3436 parser->m_idAttIndex = i;
3437 break;
3439 } else
3440 parser->m_idAttIndex = -1;
3442 /* do attribute defaulting */
3443 for (i = 0; i < nDefaultAtts; i++) {
3444 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3445 if (! (da->id->name)[-1] && da->value) {
3446 if (da->id->prefix) {
3447 if (da->id->xmlns) {
3448 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3449 da->value, bindingsPtr);
3450 if (result)
3451 return result;
3452 } else {
3453 (da->id->name)[-1] = 2;
3454 nPrefixes++;
3455 appAtts[attIndex++] = da->id->name;
3456 appAtts[attIndex++] = da->value;
3458 } else {
3459 (da->id->name)[-1] = 1;
3460 appAtts[attIndex++] = da->id->name;
3461 appAtts[attIndex++] = da->value;
3465 appAtts[attIndex] = 0;
3467 /* expand prefixed attribute names, check for duplicates,
3468 and clear flags that say whether attributes were specified */
3469 i = 0;
3470 if (nPrefixes) {
3471 int j; /* hash table index */
3472 unsigned long version = parser->m_nsAttsVersion;
3474 /* Detect and prevent invalid shift */
3475 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3476 return XML_ERROR_NO_MEMORY;
3479 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3480 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3481 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3482 if ((nPrefixes << 1)
3483 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3484 NS_ATT *temp;
3485 /* hash table size must also be a power of 2 and >= 8 */
3486 while (nPrefixes >> parser->m_nsAttsPower++)
3488 if (parser->m_nsAttsPower < 3)
3489 parser->m_nsAttsPower = 3;
3491 /* Detect and prevent invalid shift */
3492 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3493 /* Restore actual size of memory in m_nsAtts */
3494 parser->m_nsAttsPower = oldNsAttsPower;
3495 return XML_ERROR_NO_MEMORY;
3498 nsAttsSize = 1u << parser->m_nsAttsPower;
3500 /* Detect and prevent integer overflow.
3501 * The preprocessor guard addresses the "always false" warning
3502 * from -Wtype-limits on platforms where
3503 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3504 #if UINT_MAX >= SIZE_MAX
3505 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3506 /* Restore actual size of memory in m_nsAtts */
3507 parser->m_nsAttsPower = oldNsAttsPower;
3508 return XML_ERROR_NO_MEMORY;
3510 #endif
3512 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3513 nsAttsSize * sizeof(NS_ATT));
3514 if (! temp) {
3515 /* Restore actual size of memory in m_nsAtts */
3516 parser->m_nsAttsPower = oldNsAttsPower;
3517 return XML_ERROR_NO_MEMORY;
3519 parser->m_nsAtts = temp;
3520 version = 0; /* force re-initialization of m_nsAtts hash table */
3522 /* using a version flag saves us from initializing m_nsAtts every time */
3523 if (! version) { /* initialize version flags when version wraps around */
3524 version = INIT_ATTS_VERSION;
3525 for (j = nsAttsSize; j != 0;)
3526 parser->m_nsAtts[--j].version = version;
3528 parser->m_nsAttsVersion = --version;
3530 /* expand prefixed names and check for duplicates */
3531 for (; i < attIndex; i += 2) {
3532 const XML_Char *s = appAtts[i];
3533 if (s[-1] == 2) { /* prefixed */
3534 ATTRIBUTE_ID *id;
3535 const BINDING *b;
3536 unsigned long uriHash;
3537 struct siphash sip_state;
3538 struct sipkey sip_key;
3540 copy_salt_to_sipkey(parser, &sip_key);
3541 sip24_init(&sip_state, &sip_key);
3543 ((XML_Char *)s)[-1] = 0; /* clear flag */
3544 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3545 if (! id || ! id->prefix) {
3546 /* This code is walking through the appAtts array, dealing
3547 * with (in this case) a prefixed attribute name. To be in
3548 * the array, the attribute must have already been bound, so
3549 * has to have passed through the hash table lookup once
3550 * already. That implies that an entry for it already
3551 * exists, so the lookup above will return a pointer to
3552 * already allocated memory. There is no opportunaity for
3553 * the allocator to fail, so the condition above cannot be
3554 * fulfilled.
3556 * Since it is difficult to be certain that the above
3557 * analysis is complete, we retain the test and merely
3558 * remove the code from coverage tests.
3560 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3562 b = id->prefix->binding;
3563 if (! b)
3564 return XML_ERROR_UNBOUND_PREFIX;
3566 for (j = 0; j < b->uriLen; j++) {
3567 const XML_Char c = b->uri[j];
3568 if (! poolAppendChar(&parser->m_tempPool, c))
3569 return XML_ERROR_NO_MEMORY;
3572 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3574 while (*s++ != XML_T(ASCII_COLON))
3577 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3579 do { /* copies null terminator */
3580 if (! poolAppendChar(&parser->m_tempPool, *s))
3581 return XML_ERROR_NO_MEMORY;
3582 } while (*s++);
3584 uriHash = (unsigned long)sip24_final(&sip_state);
3586 { /* Check hash table for duplicate of expanded name (uriName).
3587 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3589 unsigned char step = 0;
3590 unsigned long mask = nsAttsSize - 1;
3591 j = uriHash & mask; /* index into hash table */
3592 while (parser->m_nsAtts[j].version == version) {
3593 /* for speed we compare stored hash values first */
3594 if (uriHash == parser->m_nsAtts[j].hash) {
3595 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3596 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3597 /* s1 is null terminated, but not s2 */
3598 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3600 if (*s1 == 0)
3601 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3603 if (! step)
3604 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3605 j < step ? (j += nsAttsSize - step) : (j -= step);
3609 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3610 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3611 s = b->prefix->name;
3612 do {
3613 if (! poolAppendChar(&parser->m_tempPool, *s))
3614 return XML_ERROR_NO_MEMORY;
3615 } while (*s++);
3618 /* store expanded name in attribute list */
3619 s = poolStart(&parser->m_tempPool);
3620 poolFinish(&parser->m_tempPool);
3621 appAtts[i] = s;
3623 /* fill empty slot with new version, uriName and hash value */
3624 parser->m_nsAtts[j].version = version;
3625 parser->m_nsAtts[j].hash = uriHash;
3626 parser->m_nsAtts[j].uriName = s;
3628 if (! --nPrefixes) {
3629 i += 2;
3630 break;
3632 } else /* not prefixed */
3633 ((XML_Char *)s)[-1] = 0; /* clear flag */
3636 /* clear flags for the remaining attributes */
3637 for (; i < attIndex; i += 2)
3638 ((XML_Char *)(appAtts[i]))[-1] = 0;
3639 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3640 binding->attId->name[-1] = 0;
3642 if (! parser->m_ns)
3643 return XML_ERROR_NONE;
3645 /* expand the element type name */
3646 if (elementType->prefix) {
3647 binding = elementType->prefix->binding;
3648 if (! binding)
3649 return XML_ERROR_UNBOUND_PREFIX;
3650 localPart = tagNamePtr->str;
3651 while (*localPart++ != XML_T(ASCII_COLON))
3653 } else if (dtd->defaultPrefix.binding) {
3654 binding = dtd->defaultPrefix.binding;
3655 localPart = tagNamePtr->str;
3656 } else
3657 return XML_ERROR_NONE;
3658 prefixLen = 0;
3659 if (parser->m_ns_triplets && binding->prefix->name) {
3660 for (; binding->prefix->name[prefixLen++];)
3661 ; /* prefixLen includes null terminator */
3663 tagNamePtr->localPart = localPart;
3664 tagNamePtr->uriLen = binding->uriLen;
3665 tagNamePtr->prefix = binding->prefix->name;
3666 tagNamePtr->prefixLen = prefixLen;
3667 for (i = 0; localPart[i++];)
3668 ; /* i includes null terminator */
3670 /* Detect and prevent integer overflow */
3671 if (binding->uriLen > INT_MAX - prefixLen
3672 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3673 return XML_ERROR_NO_MEMORY;
3676 n = i + binding->uriLen + prefixLen;
3677 if (n > binding->uriAlloc) {
3678 TAG *p;
3680 /* Detect and prevent integer overflow */
3681 if (n > INT_MAX - EXPAND_SPARE) {
3682 return XML_ERROR_NO_MEMORY;
3684 /* Detect and prevent integer overflow.
3685 * The preprocessor guard addresses the "always false" warning
3686 * from -Wtype-limits on platforms where
3687 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3688 #if UINT_MAX >= SIZE_MAX
3689 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3690 return XML_ERROR_NO_MEMORY;
3692 #endif
3694 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3695 if (! uri)
3696 return XML_ERROR_NO_MEMORY;
3697 binding->uriAlloc = n + EXPAND_SPARE;
3698 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3699 for (p = parser->m_tagStack; p; p = p->parent)
3700 if (p->name.str == binding->uri)
3701 p->name.str = uri;
3702 FREE(parser, binding->uri);
3703 binding->uri = uri;
3705 /* if m_namespaceSeparator != '\0' then uri includes it already */
3706 uri = binding->uri + binding->uriLen;
3707 memcpy(uri, localPart, i * sizeof(XML_Char));
3708 /* we always have a namespace separator between localPart and prefix */
3709 if (prefixLen) {
3710 uri += i - 1;
3711 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3712 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3714 tagNamePtr->str = binding->uri;
3715 return XML_ERROR_NONE;
3718 static XML_Bool
3719 is_rfc3986_uri_char(XML_Char candidate) {
3720 // For the RFC 3986 ANBF grammar see
3721 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3723 switch (candidate) {
3724 // From rule "ALPHA" (uppercase half)
3725 case 'A':
3726 case 'B':
3727 case 'C':
3728 case 'D':
3729 case 'E':
3730 case 'F':
3731 case 'G':
3732 case 'H':
3733 case 'I':
3734 case 'J':
3735 case 'K':
3736 case 'L':
3737 case 'M':
3738 case 'N':
3739 case 'O':
3740 case 'P':
3741 case 'Q':
3742 case 'R':
3743 case 'S':
3744 case 'T':
3745 case 'U':
3746 case 'V':
3747 case 'W':
3748 case 'X':
3749 case 'Y':
3750 case 'Z':
3752 // From rule "ALPHA" (lowercase half)
3753 case 'a':
3754 case 'b':
3755 case 'c':
3756 case 'd':
3757 case 'e':
3758 case 'f':
3759 case 'g':
3760 case 'h':
3761 case 'i':
3762 case 'j':
3763 case 'k':
3764 case 'l':
3765 case 'm':
3766 case 'n':
3767 case 'o':
3768 case 'p':
3769 case 'q':
3770 case 'r':
3771 case 's':
3772 case 't':
3773 case 'u':
3774 case 'v':
3775 case 'w':
3776 case 'x':
3777 case 'y':
3778 case 'z':
3780 // From rule "DIGIT"
3781 case '0':
3782 case '1':
3783 case '2':
3784 case '3':
3785 case '4':
3786 case '5':
3787 case '6':
3788 case '7':
3789 case '8':
3790 case '9':
3792 // From rule "pct-encoded"
3793 case '%':
3795 // From rule "unreserved"
3796 case '-':
3797 case '.':
3798 case '_':
3799 case '~':
3801 // From rule "gen-delims"
3802 case ':':
3803 case '/':
3804 case '?':
3805 case '#':
3806 case '[':
3807 case ']':
3808 case '@':
3810 // From rule "sub-delims"
3811 case '!':
3812 case '$':
3813 case '&':
3814 case '\'':
3815 case '(':
3816 case ')':
3817 case '*':
3818 case '+':
3819 case ',':
3820 case ';':
3821 case '=':
3822 return XML_TRUE;
3824 default:
3825 return XML_FALSE;
3829 /* addBinding() overwrites the value of prefix->binding without checking.
3830 Therefore one must keep track of the old value outside of addBinding().
3832 static enum XML_Error
3833 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3834 const XML_Char *uri, BINDING **bindingsPtr) {
3835 // "http://www.w3.org/XML/1998/namespace"
3836 static const XML_Char xmlNamespace[]
3837 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3838 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3839 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3840 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3841 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3842 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3843 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3844 ASCII_e, '\0'};
3845 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3846 // "http://www.w3.org/2000/xmlns/"
3847 static const XML_Char xmlnsNamespace[]
3848 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3849 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3850 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3851 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3852 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3853 static const int xmlnsLen
3854 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3856 XML_Bool mustBeXML = XML_FALSE;
3857 XML_Bool isXML = XML_TRUE;
3858 XML_Bool isXMLNS = XML_TRUE;
3860 BINDING *b;
3861 int len;
3863 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3864 if (*uri == XML_T('\0') && prefix->name)
3865 return XML_ERROR_UNDECLARING_PREFIX;
3867 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3868 && prefix->name[1] == XML_T(ASCII_m)
3869 && prefix->name[2] == XML_T(ASCII_l)) {
3870 /* Not allowed to bind xmlns */
3871 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3872 && prefix->name[5] == XML_T('\0'))
3873 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3875 if (prefix->name[3] == XML_T('\0'))
3876 mustBeXML = XML_TRUE;
3879 for (len = 0; uri[len]; len++) {
3880 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3881 isXML = XML_FALSE;
3883 if (! mustBeXML && isXMLNS
3884 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3885 isXMLNS = XML_FALSE;
3887 // NOTE: While Expat does not validate namespace URIs against RFC 3986
3888 // today (and is not REQUIRED to do so with regard to the XML 1.0
3889 // namespaces specification) we have to at least make sure, that
3890 // the application on top of Expat (that is likely splitting expanded
3891 // element names ("qualified names") of form
3892 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3893 // in its element handler code) cannot be confused by an attacker
3894 // putting additional namespace separator characters into namespace
3895 // declarations. That would be ambiguous and not to be expected.
3897 // While the HTML API docs of function XML_ParserCreateNS have been
3898 // advising against use of a namespace separator character that can
3899 // appear in a URI for >20 years now, some widespread applications
3900 // are using URI characters (':' (colon) in particular) for a
3901 // namespace separator, in practice. To keep these applications
3902 // functional, we only reject namespaces URIs containing the
3903 // application-chosen namespace separator if the chosen separator
3904 // is a non-URI character with regard to RFC 3986.
3905 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
3906 && ! is_rfc3986_uri_char(uri[len])) {
3907 return XML_ERROR_SYNTAX;
3910 isXML = isXML && len == xmlLen;
3911 isXMLNS = isXMLNS && len == xmlnsLen;
3913 if (mustBeXML != isXML)
3914 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3915 : XML_ERROR_RESERVED_NAMESPACE_URI;
3917 if (isXMLNS)
3918 return XML_ERROR_RESERVED_NAMESPACE_URI;
3920 if (parser->m_namespaceSeparator)
3921 len++;
3922 if (parser->m_freeBindingList) {
3923 b = parser->m_freeBindingList;
3924 if (len > b->uriAlloc) {
3925 /* Detect and prevent integer overflow */
3926 if (len > INT_MAX - EXPAND_SPARE) {
3927 return XML_ERROR_NO_MEMORY;
3930 /* Detect and prevent integer overflow.
3931 * The preprocessor guard addresses the "always false" warning
3932 * from -Wtype-limits on platforms where
3933 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3934 #if UINT_MAX >= SIZE_MAX
3935 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3936 return XML_ERROR_NO_MEMORY;
3938 #endif
3940 XML_Char *temp = (XML_Char *)REALLOC(
3941 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
3942 if (temp == NULL)
3943 return XML_ERROR_NO_MEMORY;
3944 b->uri = temp;
3945 b->uriAlloc = len + EXPAND_SPARE;
3947 parser->m_freeBindingList = b->nextTagBinding;
3948 } else {
3949 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3950 if (! b)
3951 return XML_ERROR_NO_MEMORY;
3953 /* Detect and prevent integer overflow */
3954 if (len > INT_MAX - EXPAND_SPARE) {
3955 return XML_ERROR_NO_MEMORY;
3957 /* Detect and prevent integer overflow.
3958 * The preprocessor guard addresses the "always false" warning
3959 * from -Wtype-limits on platforms where
3960 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3961 #if UINT_MAX >= SIZE_MAX
3962 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3963 return XML_ERROR_NO_MEMORY;
3965 #endif
3967 b->uri
3968 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3969 if (! b->uri) {
3970 FREE(parser, b);
3971 return XML_ERROR_NO_MEMORY;
3973 b->uriAlloc = len + EXPAND_SPARE;
3975 b->uriLen = len;
3976 memcpy(b->uri, uri, len * sizeof(XML_Char));
3977 if (parser->m_namespaceSeparator)
3978 b->uri[len - 1] = parser->m_namespaceSeparator;
3979 b->prefix = prefix;
3980 b->attId = attId;
3981 b->prevPrefixBinding = prefix->binding;
3982 /* NULL binding when default namespace undeclared */
3983 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3984 prefix->binding = NULL;
3985 else
3986 prefix->binding = b;
3987 b->nextTagBinding = *bindingsPtr;
3988 *bindingsPtr = b;
3989 /* if attId == NULL then we are not starting a namespace scope */
3990 if (attId && parser->m_startNamespaceDeclHandler)
3991 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3992 prefix->binding ? uri : 0);
3993 return XML_ERROR_NONE;
3996 /* The idea here is to avoid using stack for each CDATA section when
3997 the whole file is parsed with one call.
3999 static enum XML_Error PTRCALL
4000 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4001 const char **endPtr) {
4002 enum XML_Error result = doCdataSection(
4003 parser, parser->m_encoding, &start, end, endPtr,
4004 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4005 if (result != XML_ERROR_NONE)
4006 return result;
4007 if (start) {
4008 if (parser->m_parentParser) { /* we are parsing an external entity */
4009 parser->m_processor = externalEntityContentProcessor;
4010 return externalEntityContentProcessor(parser, start, end, endPtr);
4011 } else {
4012 parser->m_processor = contentProcessor;
4013 return contentProcessor(parser, start, end, endPtr);
4016 return result;
4019 /* startPtr gets set to non-null if the section is closed, and to null if
4020 the section is not yet closed.
4022 static enum XML_Error
4023 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4024 const char *end, const char **nextPtr, XML_Bool haveMore,
4025 enum XML_Account account) {
4026 const char *s = *startPtr;
4027 const char **eventPP;
4028 const char **eventEndPP;
4029 if (enc == parser->m_encoding) {
4030 eventPP = &parser->m_eventPtr;
4031 *eventPP = s;
4032 eventEndPP = &parser->m_eventEndPtr;
4033 } else {
4034 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4035 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4037 *eventPP = s;
4038 *startPtr = NULL;
4040 for (;;) {
4041 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4042 int tok = XmlCdataSectionTok(enc, s, end, &next);
4043 #ifdef XML_DTD
4044 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4045 accountingOnAbort(parser);
4046 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4048 #else
4049 UNUSED_P(account);
4050 #endif
4051 *eventEndPP = next;
4052 switch (tok) {
4053 case XML_TOK_CDATA_SECT_CLOSE:
4054 if (parser->m_endCdataSectionHandler)
4055 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4056 /* BEGIN disabled code */
4057 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4058 else if (0 && parser->m_characterDataHandler)
4059 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4061 /* END disabled code */
4062 else if (parser->m_defaultHandler)
4063 reportDefault(parser, enc, s, next);
4064 *startPtr = next;
4065 *nextPtr = next;
4066 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4067 return XML_ERROR_ABORTED;
4068 else
4069 return XML_ERROR_NONE;
4070 case XML_TOK_DATA_NEWLINE:
4071 if (parser->m_characterDataHandler) {
4072 XML_Char c = 0xA;
4073 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4074 } else if (parser->m_defaultHandler)
4075 reportDefault(parser, enc, s, next);
4076 break;
4077 case XML_TOK_DATA_CHARS: {
4078 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4079 if (charDataHandler) {
4080 if (MUST_CONVERT(enc, s)) {
4081 for (;;) {
4082 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4083 const enum XML_Convert_Result convert_res = XmlConvert(
4084 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4085 *eventEndPP = next;
4086 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4087 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4088 if ((convert_res == XML_CONVERT_COMPLETED)
4089 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4090 break;
4091 *eventPP = s;
4093 } else
4094 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
4095 (int)((XML_Char *)next - (XML_Char *)s));
4096 } else if (parser->m_defaultHandler)
4097 reportDefault(parser, enc, s, next);
4098 } break;
4099 case XML_TOK_INVALID:
4100 *eventPP = next;
4101 return XML_ERROR_INVALID_TOKEN;
4102 case XML_TOK_PARTIAL_CHAR:
4103 if (haveMore) {
4104 *nextPtr = s;
4105 return XML_ERROR_NONE;
4107 return XML_ERROR_PARTIAL_CHAR;
4108 case XML_TOK_PARTIAL:
4109 case XML_TOK_NONE:
4110 if (haveMore) {
4111 *nextPtr = s;
4112 return XML_ERROR_NONE;
4114 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4115 default:
4116 /* Every token returned by XmlCdataSectionTok() has its own
4117 * explicit case, so this default case will never be executed.
4118 * We retain it as a safety net and exclude it from the coverage
4119 * statistics.
4121 * LCOV_EXCL_START
4123 *eventPP = next;
4124 return XML_ERROR_UNEXPECTED_STATE;
4125 /* LCOV_EXCL_STOP */
4128 *eventPP = s = next;
4129 switch (parser->m_parsingStatus.parsing) {
4130 case XML_SUSPENDED:
4131 *nextPtr = next;
4132 return XML_ERROR_NONE;
4133 case XML_FINISHED:
4134 return XML_ERROR_ABORTED;
4135 default:;
4138 /* not reached */
4141 #ifdef XML_DTD
4143 /* The idea here is to avoid using stack for each IGNORE section when
4144 the whole file is parsed with one call.
4146 static enum XML_Error PTRCALL
4147 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4148 const char **endPtr) {
4149 enum XML_Error result
4150 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4151 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4152 if (result != XML_ERROR_NONE)
4153 return result;
4154 if (start) {
4155 parser->m_processor = prologProcessor;
4156 return prologProcessor(parser, start, end, endPtr);
4158 return result;
4161 /* startPtr gets set to non-null is the section is closed, and to null
4162 if the section is not yet closed.
4164 static enum XML_Error
4165 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4166 const char *end, const char **nextPtr, XML_Bool haveMore) {
4167 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4168 int tok;
4169 const char *s = *startPtr;
4170 const char **eventPP;
4171 const char **eventEndPP;
4172 if (enc == parser->m_encoding) {
4173 eventPP = &parser->m_eventPtr;
4174 *eventPP = s;
4175 eventEndPP = &parser->m_eventEndPtr;
4176 } else {
4177 /* It's not entirely clear, but it seems the following two lines
4178 * of code cannot be executed. The only occasions on which 'enc'
4179 * is not 'encoding' are when this function is called
4180 * from the internal entity processing, and IGNORE sections are an
4181 * error in internal entities.
4183 * Since it really isn't clear that this is true, we keep the code
4184 * and just remove it from our coverage tests.
4186 * LCOV_EXCL_START
4188 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4189 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4190 /* LCOV_EXCL_STOP */
4192 *eventPP = s;
4193 *startPtr = NULL;
4194 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4195 # ifdef XML_DTD
4196 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4197 XML_ACCOUNT_DIRECT)) {
4198 accountingOnAbort(parser);
4199 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4201 # endif
4202 *eventEndPP = next;
4203 switch (tok) {
4204 case XML_TOK_IGNORE_SECT:
4205 if (parser->m_defaultHandler)
4206 reportDefault(parser, enc, s, next);
4207 *startPtr = next;
4208 *nextPtr = next;
4209 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4210 return XML_ERROR_ABORTED;
4211 else
4212 return XML_ERROR_NONE;
4213 case XML_TOK_INVALID:
4214 *eventPP = next;
4215 return XML_ERROR_INVALID_TOKEN;
4216 case XML_TOK_PARTIAL_CHAR:
4217 if (haveMore) {
4218 *nextPtr = s;
4219 return XML_ERROR_NONE;
4221 return XML_ERROR_PARTIAL_CHAR;
4222 case XML_TOK_PARTIAL:
4223 case XML_TOK_NONE:
4224 if (haveMore) {
4225 *nextPtr = s;
4226 return XML_ERROR_NONE;
4228 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4229 default:
4230 /* All of the tokens that XmlIgnoreSectionTok() returns have
4231 * explicit cases to handle them, so this default case is never
4232 * executed. We keep it as a safety net anyway, and remove it
4233 * from our test coverage statistics.
4235 * LCOV_EXCL_START
4237 *eventPP = next;
4238 return XML_ERROR_UNEXPECTED_STATE;
4239 /* LCOV_EXCL_STOP */
4241 /* not reached */
4244 #endif /* XML_DTD */
4246 static enum XML_Error
4247 initializeEncoding(XML_Parser parser) {
4248 const char *s;
4249 #ifdef XML_UNICODE
4250 char encodingBuf[128];
4251 /* See comments about `protocolEncodingName` in parserInit() */
4252 if (! parser->m_protocolEncodingName)
4253 s = NULL;
4254 else {
4255 int i;
4256 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4257 if (i == sizeof(encodingBuf) - 1
4258 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4259 encodingBuf[0] = '\0';
4260 break;
4262 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4264 encodingBuf[i] = '\0';
4265 s = encodingBuf;
4267 #else
4268 s = parser->m_protocolEncodingName;
4269 #endif
4270 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4271 &parser->m_initEncoding, &parser->m_encoding, s))
4272 return XML_ERROR_NONE;
4273 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4276 static enum XML_Error
4277 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4278 const char *next) {
4279 const char *encodingName = NULL;
4280 const XML_Char *storedEncName = NULL;
4281 const ENCODING *newEncoding = NULL;
4282 const char *version = NULL;
4283 const char *versionend = NULL;
4284 const XML_Char *storedversion = NULL;
4285 int standalone = -1;
4287 #ifdef XML_DTD
4288 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4289 XML_ACCOUNT_DIRECT)) {
4290 accountingOnAbort(parser);
4291 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4293 #endif
4295 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4296 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4297 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4298 if (isGeneralTextEntity)
4299 return XML_ERROR_TEXT_DECL;
4300 else
4301 return XML_ERROR_XML_DECL;
4303 if (! isGeneralTextEntity && standalone == 1) {
4304 parser->m_dtd->standalone = XML_TRUE;
4305 #ifdef XML_DTD
4306 if (parser->m_paramEntityParsing
4307 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4308 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4309 #endif /* XML_DTD */
4311 if (parser->m_xmlDeclHandler) {
4312 if (encodingName != NULL) {
4313 storedEncName = poolStoreString(
4314 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4315 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4316 if (! storedEncName)
4317 return XML_ERROR_NO_MEMORY;
4318 poolFinish(&parser->m_temp2Pool);
4320 if (version) {
4321 storedversion
4322 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4323 versionend - parser->m_encoding->minBytesPerChar);
4324 if (! storedversion)
4325 return XML_ERROR_NO_MEMORY;
4327 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4328 standalone);
4329 } else if (parser->m_defaultHandler)
4330 reportDefault(parser, parser->m_encoding, s, next);
4331 if (parser->m_protocolEncodingName == NULL) {
4332 if (newEncoding) {
4333 /* Check that the specified encoding does not conflict with what
4334 * the parser has already deduced. Do we have the same number
4335 * of bytes in the smallest representation of a character? If
4336 * this is UTF-16, is it the same endianness?
4338 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4339 || (newEncoding->minBytesPerChar == 2
4340 && newEncoding != parser->m_encoding)) {
4341 parser->m_eventPtr = encodingName;
4342 return XML_ERROR_INCORRECT_ENCODING;
4344 parser->m_encoding = newEncoding;
4345 } else if (encodingName) {
4346 enum XML_Error result;
4347 if (! storedEncName) {
4348 storedEncName = poolStoreString(
4349 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4350 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4351 if (! storedEncName)
4352 return XML_ERROR_NO_MEMORY;
4354 result = handleUnknownEncoding(parser, storedEncName);
4355 poolClear(&parser->m_temp2Pool);
4356 if (result == XML_ERROR_UNKNOWN_ENCODING)
4357 parser->m_eventPtr = encodingName;
4358 return result;
4362 if (storedEncName || storedversion)
4363 poolClear(&parser->m_temp2Pool);
4365 return XML_ERROR_NONE;
4368 static enum XML_Error
4369 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4370 if (parser->m_unknownEncodingHandler) {
4371 XML_Encoding info;
4372 int i;
4373 for (i = 0; i < 256; i++)
4374 info.map[i] = -1;
4375 info.convert = NULL;
4376 info.data = NULL;
4377 info.release = NULL;
4378 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4379 encodingName, &info)) {
4380 ENCODING *enc;
4381 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4382 if (! parser->m_unknownEncodingMem) {
4383 if (info.release)
4384 info.release(info.data);
4385 return XML_ERROR_NO_MEMORY;
4387 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4388 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4389 if (enc) {
4390 parser->m_unknownEncodingData = info.data;
4391 parser->m_unknownEncodingRelease = info.release;
4392 parser->m_encoding = enc;
4393 return XML_ERROR_NONE;
4396 if (info.release != NULL)
4397 info.release(info.data);
4399 return XML_ERROR_UNKNOWN_ENCODING;
4402 static enum XML_Error PTRCALL
4403 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4404 const char **nextPtr) {
4405 enum XML_Error result = initializeEncoding(parser);
4406 if (result != XML_ERROR_NONE)
4407 return result;
4408 parser->m_processor = prologProcessor;
4409 return prologProcessor(parser, s, end, nextPtr);
4412 #ifdef XML_DTD
4414 static enum XML_Error PTRCALL
4415 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4416 const char **nextPtr) {
4417 enum XML_Error result = initializeEncoding(parser);
4418 if (result != XML_ERROR_NONE)
4419 return result;
4421 /* we know now that XML_Parse(Buffer) has been called,
4422 so we consider the external parameter entity read */
4423 parser->m_dtd->paramEntityRead = XML_TRUE;
4425 if (parser->m_prologState.inEntityValue) {
4426 parser->m_processor = entityValueInitProcessor;
4427 return entityValueInitProcessor(parser, s, end, nextPtr);
4428 } else {
4429 parser->m_processor = externalParEntProcessor;
4430 return externalParEntProcessor(parser, s, end, nextPtr);
4434 static enum XML_Error PTRCALL
4435 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4436 const char **nextPtr) {
4437 int tok;
4438 const char *start = s;
4439 const char *next = start;
4440 parser->m_eventPtr = start;
4442 for (;;) {
4443 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4444 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4445 - storeEntityValue
4446 - processXmlDecl
4448 parser->m_eventEndPtr = next;
4449 if (tok <= 0) {
4450 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4451 *nextPtr = s;
4452 return XML_ERROR_NONE;
4454 switch (tok) {
4455 case XML_TOK_INVALID:
4456 return XML_ERROR_INVALID_TOKEN;
4457 case XML_TOK_PARTIAL:
4458 return XML_ERROR_UNCLOSED_TOKEN;
4459 case XML_TOK_PARTIAL_CHAR:
4460 return XML_ERROR_PARTIAL_CHAR;
4461 case XML_TOK_NONE: /* start == end */
4462 default:
4463 break;
4465 /* found end of entity value - can store it now */
4466 return storeEntityValue(parser, parser->m_encoding, s, end,
4467 XML_ACCOUNT_DIRECT);
4468 } else if (tok == XML_TOK_XML_DECL) {
4469 enum XML_Error result;
4470 result = processXmlDecl(parser, 0, start, next);
4471 if (result != XML_ERROR_NONE)
4472 return result;
4473 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4474 * that to happen, a parameter entity parsing handler must have attempted
4475 * to suspend the parser, which fails and raises an error. The parser can
4476 * be aborted, but can't be suspended.
4478 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4479 return XML_ERROR_ABORTED;
4480 *nextPtr = next;
4481 /* stop scanning for text declaration - we found one */
4482 parser->m_processor = entityValueProcessor;
4483 return entityValueProcessor(parser, next, end, nextPtr);
4485 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4486 return XML_TOK_NONE on the next call, which would then cause the
4487 function to exit with *nextPtr set to s - that is what we want for other
4488 tokens, but not for the BOM - we would rather like to skip it;
4489 then, when this routine is entered the next time, XmlPrologTok will
4490 return XML_TOK_INVALID, since the BOM is still in the buffer
4492 else if (tok == XML_TOK_BOM && next == end
4493 && ! parser->m_parsingStatus.finalBuffer) {
4494 # ifdef XML_DTD
4495 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4496 XML_ACCOUNT_DIRECT)) {
4497 accountingOnAbort(parser);
4498 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4500 # endif
4502 *nextPtr = next;
4503 return XML_ERROR_NONE;
4505 /* If we get this token, we have the start of what might be a
4506 normal tag, but not a declaration (i.e. it doesn't begin with
4507 "<!"). In a DTD context, that isn't legal.
4509 else if (tok == XML_TOK_INSTANCE_START) {
4510 *nextPtr = next;
4511 return XML_ERROR_SYNTAX;
4513 start = next;
4514 parser->m_eventPtr = start;
4518 static enum XML_Error PTRCALL
4519 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4520 const char **nextPtr) {
4521 const char *next = s;
4522 int tok;
4524 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4525 if (tok <= 0) {
4526 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4527 *nextPtr = s;
4528 return XML_ERROR_NONE;
4530 switch (tok) {
4531 case XML_TOK_INVALID:
4532 return XML_ERROR_INVALID_TOKEN;
4533 case XML_TOK_PARTIAL:
4534 return XML_ERROR_UNCLOSED_TOKEN;
4535 case XML_TOK_PARTIAL_CHAR:
4536 return XML_ERROR_PARTIAL_CHAR;
4537 case XML_TOK_NONE: /* start == end */
4538 default:
4539 break;
4542 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4543 However, when parsing an external subset, doProlog will not accept a BOM
4544 as valid, and report a syntax error, so we have to skip the BOM, and
4545 account for the BOM bytes.
4547 else if (tok == XML_TOK_BOM) {
4548 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4549 XML_ACCOUNT_DIRECT)) {
4550 accountingOnAbort(parser);
4551 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4554 s = next;
4555 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4558 parser->m_processor = prologProcessor;
4559 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4560 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4561 XML_ACCOUNT_DIRECT);
4564 static enum XML_Error PTRCALL
4565 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4566 const char **nextPtr) {
4567 const char *start = s;
4568 const char *next = s;
4569 const ENCODING *enc = parser->m_encoding;
4570 int tok;
4572 for (;;) {
4573 tok = XmlPrologTok(enc, start, end, &next);
4574 /* Note: These bytes are accounted later in:
4575 - storeEntityValue
4577 if (tok <= 0) {
4578 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4579 *nextPtr = s;
4580 return XML_ERROR_NONE;
4582 switch (tok) {
4583 case XML_TOK_INVALID:
4584 return XML_ERROR_INVALID_TOKEN;
4585 case XML_TOK_PARTIAL:
4586 return XML_ERROR_UNCLOSED_TOKEN;
4587 case XML_TOK_PARTIAL_CHAR:
4588 return XML_ERROR_PARTIAL_CHAR;
4589 case XML_TOK_NONE: /* start == end */
4590 default:
4591 break;
4593 /* found end of entity value - can store it now */
4594 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4596 start = next;
4600 #endif /* XML_DTD */
4602 static enum XML_Error PTRCALL
4603 prologProcessor(XML_Parser parser, const char *s, const char *end,
4604 const char **nextPtr) {
4605 const char *next = s;
4606 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4607 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4608 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4609 XML_ACCOUNT_DIRECT);
4612 static enum XML_Error
4613 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4614 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4615 XML_Bool allowClosingDoctype, enum XML_Account account) {
4616 #ifdef XML_DTD
4617 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4618 #endif /* XML_DTD */
4619 static const XML_Char atypeCDATA[]
4620 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4621 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4622 static const XML_Char atypeIDREF[]
4623 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4624 static const XML_Char atypeIDREFS[]
4625 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4626 static const XML_Char atypeENTITY[]
4627 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4628 static const XML_Char atypeENTITIES[]
4629 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4630 ASCII_I, ASCII_E, ASCII_S, '\0'};
4631 static const XML_Char atypeNMTOKEN[]
4632 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4633 static const XML_Char atypeNMTOKENS[]
4634 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4635 ASCII_E, ASCII_N, ASCII_S, '\0'};
4636 static const XML_Char notationPrefix[]
4637 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4638 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4639 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4640 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4642 #ifndef XML_DTD
4643 UNUSED_P(account);
4644 #endif
4646 /* save one level of indirection */
4647 DTD *const dtd = parser->m_dtd;
4649 const char **eventPP;
4650 const char **eventEndPP;
4651 enum XML_Content_Quant quant;
4653 if (enc == parser->m_encoding) {
4654 eventPP = &parser->m_eventPtr;
4655 eventEndPP = &parser->m_eventEndPtr;
4656 } else {
4657 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4658 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4661 for (;;) {
4662 int role;
4663 XML_Bool handleDefault = XML_TRUE;
4664 *eventPP = s;
4665 *eventEndPP = next;
4666 if (tok <= 0) {
4667 if (haveMore && tok != XML_TOK_INVALID) {
4668 *nextPtr = s;
4669 return XML_ERROR_NONE;
4671 switch (tok) {
4672 case XML_TOK_INVALID:
4673 *eventPP = next;
4674 return XML_ERROR_INVALID_TOKEN;
4675 case XML_TOK_PARTIAL:
4676 return XML_ERROR_UNCLOSED_TOKEN;
4677 case XML_TOK_PARTIAL_CHAR:
4678 return XML_ERROR_PARTIAL_CHAR;
4679 case -XML_TOK_PROLOG_S:
4680 tok = -tok;
4681 break;
4682 case XML_TOK_NONE:
4683 #ifdef XML_DTD
4684 /* for internal PE NOT referenced between declarations */
4685 if (enc != parser->m_encoding
4686 && ! parser->m_openInternalEntities->betweenDecl) {
4687 *nextPtr = s;
4688 return XML_ERROR_NONE;
4690 /* WFC: PE Between Declarations - must check that PE contains
4691 complete markup, not only for external PEs, but also for
4692 internal PEs if the reference occurs between declarations.
4694 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4695 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4696 == XML_ROLE_ERROR)
4697 return XML_ERROR_INCOMPLETE_PE;
4698 *nextPtr = s;
4699 return XML_ERROR_NONE;
4701 #endif /* XML_DTD */
4702 return XML_ERROR_NO_ELEMENTS;
4703 default:
4704 tok = -tok;
4705 next = end;
4706 break;
4709 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4710 #ifdef XML_DTD
4711 switch (role) {
4712 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4713 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4714 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4715 break;
4716 default:
4717 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4718 accountingOnAbort(parser);
4719 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4722 #endif
4723 switch (role) {
4724 case XML_ROLE_XML_DECL: {
4725 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4726 if (result != XML_ERROR_NONE)
4727 return result;
4728 enc = parser->m_encoding;
4729 handleDefault = XML_FALSE;
4730 } break;
4731 case XML_ROLE_DOCTYPE_NAME:
4732 if (parser->m_startDoctypeDeclHandler) {
4733 parser->m_doctypeName
4734 = poolStoreString(&parser->m_tempPool, enc, s, next);
4735 if (! parser->m_doctypeName)
4736 return XML_ERROR_NO_MEMORY;
4737 poolFinish(&parser->m_tempPool);
4738 parser->m_doctypePubid = NULL;
4739 handleDefault = XML_FALSE;
4741 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4742 break;
4743 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4744 if (parser->m_startDoctypeDeclHandler) {
4745 parser->m_startDoctypeDeclHandler(
4746 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4747 parser->m_doctypePubid, 1);
4748 parser->m_doctypeName = NULL;
4749 poolClear(&parser->m_tempPool);
4750 handleDefault = XML_FALSE;
4752 break;
4753 #ifdef XML_DTD
4754 case XML_ROLE_TEXT_DECL: {
4755 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4756 if (result != XML_ERROR_NONE)
4757 return result;
4758 enc = parser->m_encoding;
4759 handleDefault = XML_FALSE;
4760 } break;
4761 #endif /* XML_DTD */
4762 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4763 #ifdef XML_DTD
4764 parser->m_useForeignDTD = XML_FALSE;
4765 parser->m_declEntity = (ENTITY *)lookup(
4766 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4767 if (! parser->m_declEntity)
4768 return XML_ERROR_NO_MEMORY;
4769 #endif /* XML_DTD */
4770 dtd->hasParamEntityRefs = XML_TRUE;
4771 if (parser->m_startDoctypeDeclHandler) {
4772 XML_Char *pubId;
4773 if (! XmlIsPublicId(enc, s, next, eventPP))
4774 return XML_ERROR_PUBLICID;
4775 pubId = poolStoreString(&parser->m_tempPool, enc,
4776 s + enc->minBytesPerChar,
4777 next - enc->minBytesPerChar);
4778 if (! pubId)
4779 return XML_ERROR_NO_MEMORY;
4780 normalizePublicId(pubId);
4781 poolFinish(&parser->m_tempPool);
4782 parser->m_doctypePubid = pubId;
4783 handleDefault = XML_FALSE;
4784 goto alreadyChecked;
4786 /* fall through */
4787 case XML_ROLE_ENTITY_PUBLIC_ID:
4788 if (! XmlIsPublicId(enc, s, next, eventPP))
4789 return XML_ERROR_PUBLICID;
4790 alreadyChecked:
4791 if (dtd->keepProcessing && parser->m_declEntity) {
4792 XML_Char *tem
4793 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4794 next - enc->minBytesPerChar);
4795 if (! tem)
4796 return XML_ERROR_NO_MEMORY;
4797 normalizePublicId(tem);
4798 parser->m_declEntity->publicId = tem;
4799 poolFinish(&dtd->pool);
4800 /* Don't suppress the default handler if we fell through from
4801 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4803 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4804 handleDefault = XML_FALSE;
4806 break;
4807 case XML_ROLE_DOCTYPE_CLOSE:
4808 if (allowClosingDoctype != XML_TRUE) {
4809 /* Must not close doctype from within expanded parameter entities */
4810 return XML_ERROR_INVALID_TOKEN;
4813 if (parser->m_doctypeName) {
4814 parser->m_startDoctypeDeclHandler(
4815 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4816 parser->m_doctypePubid, 0);
4817 poolClear(&parser->m_tempPool);
4818 handleDefault = XML_FALSE;
4820 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4821 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4822 was not set, indicating an external subset
4824 #ifdef XML_DTD
4825 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4826 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4827 dtd->hasParamEntityRefs = XML_TRUE;
4828 if (parser->m_paramEntityParsing
4829 && parser->m_externalEntityRefHandler) {
4830 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4831 externalSubsetName, sizeof(ENTITY));
4832 if (! entity) {
4833 /* The external subset name "#" will have already been
4834 * inserted into the hash table at the start of the
4835 * external entity parsing, so no allocation will happen
4836 * and lookup() cannot fail.
4838 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4840 if (parser->m_useForeignDTD)
4841 entity->base = parser->m_curBase;
4842 dtd->paramEntityRead = XML_FALSE;
4843 if (! parser->m_externalEntityRefHandler(
4844 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4845 entity->systemId, entity->publicId))
4846 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4847 if (dtd->paramEntityRead) {
4848 if (! dtd->standalone && parser->m_notStandaloneHandler
4849 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4850 return XML_ERROR_NOT_STANDALONE;
4852 /* if we didn't read the foreign DTD then this means that there
4853 is no external subset and we must reset dtd->hasParamEntityRefs
4855 else if (! parser->m_doctypeSysid)
4856 dtd->hasParamEntityRefs = hadParamEntityRefs;
4857 /* end of DTD - no need to update dtd->keepProcessing */
4859 parser->m_useForeignDTD = XML_FALSE;
4861 #endif /* XML_DTD */
4862 if (parser->m_endDoctypeDeclHandler) {
4863 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4864 handleDefault = XML_FALSE;
4866 break;
4867 case XML_ROLE_INSTANCE_START:
4868 #ifdef XML_DTD
4869 /* if there is no DOCTYPE declaration then now is the
4870 last chance to read the foreign DTD
4872 if (parser->m_useForeignDTD) {
4873 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4874 dtd->hasParamEntityRefs = XML_TRUE;
4875 if (parser->m_paramEntityParsing
4876 && parser->m_externalEntityRefHandler) {
4877 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4878 externalSubsetName, sizeof(ENTITY));
4879 if (! entity)
4880 return XML_ERROR_NO_MEMORY;
4881 entity->base = parser->m_curBase;
4882 dtd->paramEntityRead = XML_FALSE;
4883 if (! parser->m_externalEntityRefHandler(
4884 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4885 entity->systemId, entity->publicId))
4886 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4887 if (dtd->paramEntityRead) {
4888 if (! dtd->standalone && parser->m_notStandaloneHandler
4889 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4890 return XML_ERROR_NOT_STANDALONE;
4892 /* if we didn't read the foreign DTD then this means that there
4893 is no external subset and we must reset dtd->hasParamEntityRefs
4895 else
4896 dtd->hasParamEntityRefs = hadParamEntityRefs;
4897 /* end of DTD - no need to update dtd->keepProcessing */
4900 #endif /* XML_DTD */
4901 parser->m_processor = contentProcessor;
4902 return contentProcessor(parser, s, end, nextPtr);
4903 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4904 parser->m_declElementType = getElementType(parser, enc, s, next);
4905 if (! parser->m_declElementType)
4906 return XML_ERROR_NO_MEMORY;
4907 goto checkAttListDeclHandler;
4908 case XML_ROLE_ATTRIBUTE_NAME:
4909 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4910 if (! parser->m_declAttributeId)
4911 return XML_ERROR_NO_MEMORY;
4912 parser->m_declAttributeIsCdata = XML_FALSE;
4913 parser->m_declAttributeType = NULL;
4914 parser->m_declAttributeIsId = XML_FALSE;
4915 goto checkAttListDeclHandler;
4916 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4917 parser->m_declAttributeIsCdata = XML_TRUE;
4918 parser->m_declAttributeType = atypeCDATA;
4919 goto checkAttListDeclHandler;
4920 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4921 parser->m_declAttributeIsId = XML_TRUE;
4922 parser->m_declAttributeType = atypeID;
4923 goto checkAttListDeclHandler;
4924 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4925 parser->m_declAttributeType = atypeIDREF;
4926 goto checkAttListDeclHandler;
4927 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4928 parser->m_declAttributeType = atypeIDREFS;
4929 goto checkAttListDeclHandler;
4930 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4931 parser->m_declAttributeType = atypeENTITY;
4932 goto checkAttListDeclHandler;
4933 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4934 parser->m_declAttributeType = atypeENTITIES;
4935 goto checkAttListDeclHandler;
4936 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4937 parser->m_declAttributeType = atypeNMTOKEN;
4938 goto checkAttListDeclHandler;
4939 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4940 parser->m_declAttributeType = atypeNMTOKENS;
4941 checkAttListDeclHandler:
4942 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4943 handleDefault = XML_FALSE;
4944 break;
4945 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4946 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4947 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4948 const XML_Char *prefix;
4949 if (parser->m_declAttributeType) {
4950 prefix = enumValueSep;
4951 } else {
4952 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4953 : enumValueStart);
4955 if (! poolAppendString(&parser->m_tempPool, prefix))
4956 return XML_ERROR_NO_MEMORY;
4957 if (! poolAppend(&parser->m_tempPool, enc, s, next))
4958 return XML_ERROR_NO_MEMORY;
4959 parser->m_declAttributeType = parser->m_tempPool.start;
4960 handleDefault = XML_FALSE;
4962 break;
4963 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4964 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4965 if (dtd->keepProcessing) {
4966 if (! defineAttribute(parser->m_declElementType,
4967 parser->m_declAttributeId,
4968 parser->m_declAttributeIsCdata,
4969 parser->m_declAttributeIsId, 0, parser))
4970 return XML_ERROR_NO_MEMORY;
4971 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4972 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4973 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4974 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4975 /* Enumerated or Notation type */
4976 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4977 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4978 return XML_ERROR_NO_MEMORY;
4979 parser->m_declAttributeType = parser->m_tempPool.start;
4980 poolFinish(&parser->m_tempPool);
4982 *eventEndPP = s;
4983 parser->m_attlistDeclHandler(
4984 parser->m_handlerArg, parser->m_declElementType->name,
4985 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4986 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4987 handleDefault = XML_FALSE;
4990 poolClear(&parser->m_tempPool);
4991 break;
4992 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4993 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4994 if (dtd->keepProcessing) {
4995 const XML_Char *attVal;
4996 enum XML_Error result = storeAttributeValue(
4997 parser, enc, parser->m_declAttributeIsCdata,
4998 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
4999 XML_ACCOUNT_NONE);
5000 if (result)
5001 return result;
5002 attVal = poolStart(&dtd->pool);
5003 poolFinish(&dtd->pool);
5004 /* ID attributes aren't allowed to have a default */
5005 if (! defineAttribute(
5006 parser->m_declElementType, parser->m_declAttributeId,
5007 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5008 return XML_ERROR_NO_MEMORY;
5009 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5010 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5011 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5012 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5013 /* Enumerated or Notation type */
5014 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5015 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5016 return XML_ERROR_NO_MEMORY;
5017 parser->m_declAttributeType = parser->m_tempPool.start;
5018 poolFinish(&parser->m_tempPool);
5020 *eventEndPP = s;
5021 parser->m_attlistDeclHandler(
5022 parser->m_handlerArg, parser->m_declElementType->name,
5023 parser->m_declAttributeId->name, parser->m_declAttributeType,
5024 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5025 poolClear(&parser->m_tempPool);
5026 handleDefault = XML_FALSE;
5029 break;
5030 case XML_ROLE_ENTITY_VALUE:
5031 if (dtd->keepProcessing) {
5032 enum XML_Error result
5033 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
5034 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
5035 if (parser->m_declEntity) {
5036 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5037 parser->m_declEntity->textLen
5038 = (int)(poolLength(&dtd->entityValuePool));
5039 poolFinish(&dtd->entityValuePool);
5040 if (parser->m_entityDeclHandler) {
5041 *eventEndPP = s;
5042 parser->m_entityDeclHandler(
5043 parser->m_handlerArg, parser->m_declEntity->name,
5044 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5045 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5046 handleDefault = XML_FALSE;
5048 } else
5049 poolDiscard(&dtd->entityValuePool);
5050 if (result != XML_ERROR_NONE)
5051 return result;
5053 break;
5054 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5055 #ifdef XML_DTD
5056 parser->m_useForeignDTD = XML_FALSE;
5057 #endif /* XML_DTD */
5058 dtd->hasParamEntityRefs = XML_TRUE;
5059 if (parser->m_startDoctypeDeclHandler) {
5060 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5061 s + enc->minBytesPerChar,
5062 next - enc->minBytesPerChar);
5063 if (parser->m_doctypeSysid == NULL)
5064 return XML_ERROR_NO_MEMORY;
5065 poolFinish(&parser->m_tempPool);
5066 handleDefault = XML_FALSE;
5068 #ifdef XML_DTD
5069 else
5070 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5071 for the case where no parser->m_startDoctypeDeclHandler is set */
5072 parser->m_doctypeSysid = externalSubsetName;
5073 #endif /* XML_DTD */
5074 if (! dtd->standalone
5075 #ifdef XML_DTD
5076 && ! parser->m_paramEntityParsing
5077 #endif /* XML_DTD */
5078 && parser->m_notStandaloneHandler
5079 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5080 return XML_ERROR_NOT_STANDALONE;
5081 #ifndef XML_DTD
5082 break;
5083 #else /* XML_DTD */
5084 if (! parser->m_declEntity) {
5085 parser->m_declEntity = (ENTITY *)lookup(
5086 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5087 if (! parser->m_declEntity)
5088 return XML_ERROR_NO_MEMORY;
5089 parser->m_declEntity->publicId = NULL;
5091 #endif /* XML_DTD */
5092 /* fall through */
5093 case XML_ROLE_ENTITY_SYSTEM_ID:
5094 if (dtd->keepProcessing && parser->m_declEntity) {
5095 parser->m_declEntity->systemId
5096 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5097 next - enc->minBytesPerChar);
5098 if (! parser->m_declEntity->systemId)
5099 return XML_ERROR_NO_MEMORY;
5100 parser->m_declEntity->base = parser->m_curBase;
5101 poolFinish(&dtd->pool);
5102 /* Don't suppress the default handler if we fell through from
5103 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5105 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5106 handleDefault = XML_FALSE;
5108 break;
5109 case XML_ROLE_ENTITY_COMPLETE:
5110 if (dtd->keepProcessing && parser->m_declEntity
5111 && parser->m_entityDeclHandler) {
5112 *eventEndPP = s;
5113 parser->m_entityDeclHandler(
5114 parser->m_handlerArg, parser->m_declEntity->name,
5115 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5116 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5117 handleDefault = XML_FALSE;
5119 break;
5120 case XML_ROLE_ENTITY_NOTATION_NAME:
5121 if (dtd->keepProcessing && parser->m_declEntity) {
5122 parser->m_declEntity->notation
5123 = poolStoreString(&dtd->pool, enc, s, next);
5124 if (! parser->m_declEntity->notation)
5125 return XML_ERROR_NO_MEMORY;
5126 poolFinish(&dtd->pool);
5127 if (parser->m_unparsedEntityDeclHandler) {
5128 *eventEndPP = s;
5129 parser->m_unparsedEntityDeclHandler(
5130 parser->m_handlerArg, parser->m_declEntity->name,
5131 parser->m_declEntity->base, parser->m_declEntity->systemId,
5132 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5133 handleDefault = XML_FALSE;
5134 } else if (parser->m_entityDeclHandler) {
5135 *eventEndPP = s;
5136 parser->m_entityDeclHandler(
5137 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5138 parser->m_declEntity->base, parser->m_declEntity->systemId,
5139 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5140 handleDefault = XML_FALSE;
5143 break;
5144 case XML_ROLE_GENERAL_ENTITY_NAME: {
5145 if (XmlPredefinedEntityName(enc, s, next)) {
5146 parser->m_declEntity = NULL;
5147 break;
5149 if (dtd->keepProcessing) {
5150 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5151 if (! name)
5152 return XML_ERROR_NO_MEMORY;
5153 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5154 name, sizeof(ENTITY));
5155 if (! parser->m_declEntity)
5156 return XML_ERROR_NO_MEMORY;
5157 if (parser->m_declEntity->name != name) {
5158 poolDiscard(&dtd->pool);
5159 parser->m_declEntity = NULL;
5160 } else {
5161 poolFinish(&dtd->pool);
5162 parser->m_declEntity->publicId = NULL;
5163 parser->m_declEntity->is_param = XML_FALSE;
5164 /* if we have a parent parser or are reading an internal parameter
5165 entity, then the entity declaration is not considered "internal"
5167 parser->m_declEntity->is_internal
5168 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5169 if (parser->m_entityDeclHandler)
5170 handleDefault = XML_FALSE;
5172 } else {
5173 poolDiscard(&dtd->pool);
5174 parser->m_declEntity = NULL;
5176 } break;
5177 case XML_ROLE_PARAM_ENTITY_NAME:
5178 #ifdef XML_DTD
5179 if (dtd->keepProcessing) {
5180 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5181 if (! name)
5182 return XML_ERROR_NO_MEMORY;
5183 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5184 name, sizeof(ENTITY));
5185 if (! parser->m_declEntity)
5186 return XML_ERROR_NO_MEMORY;
5187 if (parser->m_declEntity->name != name) {
5188 poolDiscard(&dtd->pool);
5189 parser->m_declEntity = NULL;
5190 } else {
5191 poolFinish(&dtd->pool);
5192 parser->m_declEntity->publicId = NULL;
5193 parser->m_declEntity->is_param = XML_TRUE;
5194 /* if we have a parent parser or are reading an internal parameter
5195 entity, then the entity declaration is not considered "internal"
5197 parser->m_declEntity->is_internal
5198 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5199 if (parser->m_entityDeclHandler)
5200 handleDefault = XML_FALSE;
5202 } else {
5203 poolDiscard(&dtd->pool);
5204 parser->m_declEntity = NULL;
5206 #else /* not XML_DTD */
5207 parser->m_declEntity = NULL;
5208 #endif /* XML_DTD */
5209 break;
5210 case XML_ROLE_NOTATION_NAME:
5211 parser->m_declNotationPublicId = NULL;
5212 parser->m_declNotationName = NULL;
5213 if (parser->m_notationDeclHandler) {
5214 parser->m_declNotationName
5215 = poolStoreString(&parser->m_tempPool, enc, s, next);
5216 if (! parser->m_declNotationName)
5217 return XML_ERROR_NO_MEMORY;
5218 poolFinish(&parser->m_tempPool);
5219 handleDefault = XML_FALSE;
5221 break;
5222 case XML_ROLE_NOTATION_PUBLIC_ID:
5223 if (! XmlIsPublicId(enc, s, next, eventPP))
5224 return XML_ERROR_PUBLICID;
5225 if (parser
5226 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5227 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5228 s + enc->minBytesPerChar,
5229 next - enc->minBytesPerChar);
5230 if (! tem)
5231 return XML_ERROR_NO_MEMORY;
5232 normalizePublicId(tem);
5233 parser->m_declNotationPublicId = tem;
5234 poolFinish(&parser->m_tempPool);
5235 handleDefault = XML_FALSE;
5237 break;
5238 case XML_ROLE_NOTATION_SYSTEM_ID:
5239 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5240 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5241 s + enc->minBytesPerChar,
5242 next - enc->minBytesPerChar);
5243 if (! systemId)
5244 return XML_ERROR_NO_MEMORY;
5245 *eventEndPP = s;
5246 parser->m_notationDeclHandler(
5247 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5248 systemId, parser->m_declNotationPublicId);
5249 handleDefault = XML_FALSE;
5251 poolClear(&parser->m_tempPool);
5252 break;
5253 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5254 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5255 *eventEndPP = s;
5256 parser->m_notationDeclHandler(
5257 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5258 0, parser->m_declNotationPublicId);
5259 handleDefault = XML_FALSE;
5261 poolClear(&parser->m_tempPool);
5262 break;
5263 case XML_ROLE_ERROR:
5264 switch (tok) {
5265 case XML_TOK_PARAM_ENTITY_REF:
5266 /* PE references in internal subset are
5267 not allowed within declarations. */
5268 return XML_ERROR_PARAM_ENTITY_REF;
5269 case XML_TOK_XML_DECL:
5270 return XML_ERROR_MISPLACED_XML_PI;
5271 default:
5272 return XML_ERROR_SYNTAX;
5274 #ifdef XML_DTD
5275 case XML_ROLE_IGNORE_SECT: {
5276 enum XML_Error result;
5277 if (parser->m_defaultHandler)
5278 reportDefault(parser, enc, s, next);
5279 handleDefault = XML_FALSE;
5280 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5281 if (result != XML_ERROR_NONE)
5282 return result;
5283 else if (! next) {
5284 parser->m_processor = ignoreSectionProcessor;
5285 return result;
5287 } break;
5288 #endif /* XML_DTD */
5289 case XML_ROLE_GROUP_OPEN:
5290 if (parser->m_prologState.level >= parser->m_groupSize) {
5291 if (parser->m_groupSize) {
5293 /* Detect and prevent integer overflow */
5294 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5295 return XML_ERROR_NO_MEMORY;
5298 char *const new_connector = (char *)REALLOC(
5299 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5300 if (new_connector == NULL) {
5301 parser->m_groupSize /= 2;
5302 return XML_ERROR_NO_MEMORY;
5304 parser->m_groupConnector = new_connector;
5307 if (dtd->scaffIndex) {
5308 /* Detect and prevent integer overflow.
5309 * The preprocessor guard addresses the "always false" warning
5310 * from -Wtype-limits on platforms where
5311 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5312 #if UINT_MAX >= SIZE_MAX
5313 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5314 return XML_ERROR_NO_MEMORY;
5316 #endif
5318 int *const new_scaff_index = (int *)REALLOC(
5319 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5320 if (new_scaff_index == NULL)
5321 return XML_ERROR_NO_MEMORY;
5322 dtd->scaffIndex = new_scaff_index;
5324 } else {
5325 parser->m_groupConnector
5326 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5327 if (! parser->m_groupConnector) {
5328 parser->m_groupSize = 0;
5329 return XML_ERROR_NO_MEMORY;
5333 parser->m_groupConnector[parser->m_prologState.level] = 0;
5334 if (dtd->in_eldecl) {
5335 int myindex = nextScaffoldPart(parser);
5336 if (myindex < 0)
5337 return XML_ERROR_NO_MEMORY;
5338 assert(dtd->scaffIndex != NULL);
5339 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5340 dtd->scaffLevel++;
5341 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5342 if (parser->m_elementDeclHandler)
5343 handleDefault = XML_FALSE;
5345 break;
5346 case XML_ROLE_GROUP_SEQUENCE:
5347 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5348 return XML_ERROR_SYNTAX;
5349 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5350 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5351 handleDefault = XML_FALSE;
5352 break;
5353 case XML_ROLE_GROUP_CHOICE:
5354 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5355 return XML_ERROR_SYNTAX;
5356 if (dtd->in_eldecl
5357 && ! parser->m_groupConnector[parser->m_prologState.level]
5358 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5359 != XML_CTYPE_MIXED)) {
5360 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5361 = XML_CTYPE_CHOICE;
5362 if (parser->m_elementDeclHandler)
5363 handleDefault = XML_FALSE;
5365 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5366 break;
5367 case XML_ROLE_PARAM_ENTITY_REF:
5368 #ifdef XML_DTD
5369 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5370 dtd->hasParamEntityRefs = XML_TRUE;
5371 if (! parser->m_paramEntityParsing)
5372 dtd->keepProcessing = dtd->standalone;
5373 else {
5374 const XML_Char *name;
5375 ENTITY *entity;
5376 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5377 next - enc->minBytesPerChar);
5378 if (! name)
5379 return XML_ERROR_NO_MEMORY;
5380 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5381 poolDiscard(&dtd->pool);
5382 /* first, determine if a check for an existing declaration is needed;
5383 if yes, check that the entity exists, and that it is internal,
5384 otherwise call the skipped entity handler
5386 if (parser->m_prologState.documentEntity
5387 && (dtd->standalone ? ! parser->m_openInternalEntities
5388 : ! dtd->hasParamEntityRefs)) {
5389 if (! entity)
5390 return XML_ERROR_UNDEFINED_ENTITY;
5391 else if (! entity->is_internal) {
5392 /* It's hard to exhaustively search the code to be sure,
5393 * but there doesn't seem to be a way of executing the
5394 * following line. There are two cases:
5396 * If 'standalone' is false, the DTD must have no
5397 * parameter entities or we wouldn't have passed the outer
5398 * 'if' statement. That means the only entity in the hash
5399 * table is the external subset name "#" which cannot be
5400 * given as a parameter entity name in XML syntax, so the
5401 * lookup must have returned NULL and we don't even reach
5402 * the test for an internal entity.
5404 * If 'standalone' is true, it does not seem to be
5405 * possible to create entities taking this code path that
5406 * are not internal entities, so fail the test above.
5408 * Because this analysis is very uncertain, the code is
5409 * being left in place and merely removed from the
5410 * coverage test statistics.
5412 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5414 } else if (! entity) {
5415 dtd->keepProcessing = dtd->standalone;
5416 /* cannot report skipped entities in declarations */
5417 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5418 && parser->m_skippedEntityHandler) {
5419 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5420 handleDefault = XML_FALSE;
5422 break;
5424 if (entity->open)
5425 return XML_ERROR_RECURSIVE_ENTITY_REF;
5426 if (entity->textPtr) {
5427 enum XML_Error result;
5428 XML_Bool betweenDecl
5429 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5430 result = processInternalEntity(parser, entity, betweenDecl);
5431 if (result != XML_ERROR_NONE)
5432 return result;
5433 handleDefault = XML_FALSE;
5434 break;
5436 if (parser->m_externalEntityRefHandler) {
5437 dtd->paramEntityRead = XML_FALSE;
5438 entity->open = XML_TRUE;
5439 entityTrackingOnOpen(parser, entity, __LINE__);
5440 if (! parser->m_externalEntityRefHandler(
5441 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5442 entity->systemId, entity->publicId)) {
5443 entityTrackingOnClose(parser, entity, __LINE__);
5444 entity->open = XML_FALSE;
5445 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5447 entityTrackingOnClose(parser, entity, __LINE__);
5448 entity->open = XML_FALSE;
5449 handleDefault = XML_FALSE;
5450 if (! dtd->paramEntityRead) {
5451 dtd->keepProcessing = dtd->standalone;
5452 break;
5454 } else {
5455 dtd->keepProcessing = dtd->standalone;
5456 break;
5459 #endif /* XML_DTD */
5460 if (! dtd->standalone && parser->m_notStandaloneHandler
5461 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5462 return XML_ERROR_NOT_STANDALONE;
5463 break;
5465 /* Element declaration stuff */
5467 case XML_ROLE_ELEMENT_NAME:
5468 if (parser->m_elementDeclHandler) {
5469 parser->m_declElementType = getElementType(parser, enc, s, next);
5470 if (! parser->m_declElementType)
5471 return XML_ERROR_NO_MEMORY;
5472 dtd->scaffLevel = 0;
5473 dtd->scaffCount = 0;
5474 dtd->in_eldecl = XML_TRUE;
5475 handleDefault = XML_FALSE;
5477 break;
5479 case XML_ROLE_CONTENT_ANY:
5480 case XML_ROLE_CONTENT_EMPTY:
5481 if (dtd->in_eldecl) {
5482 if (parser->m_elementDeclHandler) {
5483 XML_Content *content
5484 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5485 if (! content)
5486 return XML_ERROR_NO_MEMORY;
5487 content->quant = XML_CQUANT_NONE;
5488 content->name = NULL;
5489 content->numchildren = 0;
5490 content->children = NULL;
5491 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5492 : XML_CTYPE_EMPTY);
5493 *eventEndPP = s;
5494 parser->m_elementDeclHandler(
5495 parser->m_handlerArg, parser->m_declElementType->name, content);
5496 handleDefault = XML_FALSE;
5498 dtd->in_eldecl = XML_FALSE;
5500 break;
5502 case XML_ROLE_CONTENT_PCDATA:
5503 if (dtd->in_eldecl) {
5504 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5505 = XML_CTYPE_MIXED;
5506 if (parser->m_elementDeclHandler)
5507 handleDefault = XML_FALSE;
5509 break;
5511 case XML_ROLE_CONTENT_ELEMENT:
5512 quant = XML_CQUANT_NONE;
5513 goto elementContent;
5514 case XML_ROLE_CONTENT_ELEMENT_OPT:
5515 quant = XML_CQUANT_OPT;
5516 goto elementContent;
5517 case XML_ROLE_CONTENT_ELEMENT_REP:
5518 quant = XML_CQUANT_REP;
5519 goto elementContent;
5520 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5521 quant = XML_CQUANT_PLUS;
5522 elementContent:
5523 if (dtd->in_eldecl) {
5524 ELEMENT_TYPE *el;
5525 const XML_Char *name;
5526 size_t nameLen;
5527 const char *nxt
5528 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5529 int myindex = nextScaffoldPart(parser);
5530 if (myindex < 0)
5531 return XML_ERROR_NO_MEMORY;
5532 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5533 dtd->scaffold[myindex].quant = quant;
5534 el = getElementType(parser, enc, s, nxt);
5535 if (! el)
5536 return XML_ERROR_NO_MEMORY;
5537 name = el->name;
5538 dtd->scaffold[myindex].name = name;
5539 nameLen = 0;
5540 for (; name[nameLen++];)
5543 /* Detect and prevent integer overflow */
5544 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5545 return XML_ERROR_NO_MEMORY;
5548 dtd->contentStringLen += (unsigned)nameLen;
5549 if (parser->m_elementDeclHandler)
5550 handleDefault = XML_FALSE;
5552 break;
5554 case XML_ROLE_GROUP_CLOSE:
5555 quant = XML_CQUANT_NONE;
5556 goto closeGroup;
5557 case XML_ROLE_GROUP_CLOSE_OPT:
5558 quant = XML_CQUANT_OPT;
5559 goto closeGroup;
5560 case XML_ROLE_GROUP_CLOSE_REP:
5561 quant = XML_CQUANT_REP;
5562 goto closeGroup;
5563 case XML_ROLE_GROUP_CLOSE_PLUS:
5564 quant = XML_CQUANT_PLUS;
5565 closeGroup:
5566 if (dtd->in_eldecl) {
5567 if (parser->m_elementDeclHandler)
5568 handleDefault = XML_FALSE;
5569 dtd->scaffLevel--;
5570 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5571 if (dtd->scaffLevel == 0) {
5572 if (! handleDefault) {
5573 XML_Content *model = build_model(parser);
5574 if (! model)
5575 return XML_ERROR_NO_MEMORY;
5576 *eventEndPP = s;
5577 parser->m_elementDeclHandler(
5578 parser->m_handlerArg, parser->m_declElementType->name, model);
5580 dtd->in_eldecl = XML_FALSE;
5581 dtd->contentStringLen = 0;
5584 break;
5585 /* End element declaration stuff */
5587 case XML_ROLE_PI:
5588 if (! reportProcessingInstruction(parser, enc, s, next))
5589 return XML_ERROR_NO_MEMORY;
5590 handleDefault = XML_FALSE;
5591 break;
5592 case XML_ROLE_COMMENT:
5593 if (! reportComment(parser, enc, s, next))
5594 return XML_ERROR_NO_MEMORY;
5595 handleDefault = XML_FALSE;
5596 break;
5597 case XML_ROLE_NONE:
5598 switch (tok) {
5599 case XML_TOK_BOM:
5600 handleDefault = XML_FALSE;
5601 break;
5603 break;
5604 case XML_ROLE_DOCTYPE_NONE:
5605 if (parser->m_startDoctypeDeclHandler)
5606 handleDefault = XML_FALSE;
5607 break;
5608 case XML_ROLE_ENTITY_NONE:
5609 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5610 handleDefault = XML_FALSE;
5611 break;
5612 case XML_ROLE_NOTATION_NONE:
5613 if (parser->m_notationDeclHandler)
5614 handleDefault = XML_FALSE;
5615 break;
5616 case XML_ROLE_ATTLIST_NONE:
5617 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5618 handleDefault = XML_FALSE;
5619 break;
5620 case XML_ROLE_ELEMENT_NONE:
5621 if (parser->m_elementDeclHandler)
5622 handleDefault = XML_FALSE;
5623 break;
5624 } /* end of big switch */
5626 if (handleDefault && parser->m_defaultHandler)
5627 reportDefault(parser, enc, s, next);
5629 switch (parser->m_parsingStatus.parsing) {
5630 case XML_SUSPENDED:
5631 *nextPtr = next;
5632 return XML_ERROR_NONE;
5633 case XML_FINISHED:
5634 return XML_ERROR_ABORTED;
5635 default:
5636 s = next;
5637 tok = XmlPrologTok(enc, s, end, &next);
5640 /* not reached */
5643 static enum XML_Error PTRCALL
5644 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5645 const char **nextPtr) {
5646 parser->m_processor = epilogProcessor;
5647 parser->m_eventPtr = s;
5648 for (;;) {
5649 const char *next = NULL;
5650 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5651 #ifdef XML_DTD
5652 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5653 XML_ACCOUNT_DIRECT)) {
5654 accountingOnAbort(parser);
5655 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5657 #endif
5658 parser->m_eventEndPtr = next;
5659 switch (tok) {
5660 /* report partial linebreak - it might be the last token */
5661 case -XML_TOK_PROLOG_S:
5662 if (parser->m_defaultHandler) {
5663 reportDefault(parser, parser->m_encoding, s, next);
5664 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5665 return XML_ERROR_ABORTED;
5667 *nextPtr = next;
5668 return XML_ERROR_NONE;
5669 case XML_TOK_NONE:
5670 *nextPtr = s;
5671 return XML_ERROR_NONE;
5672 case XML_TOK_PROLOG_S:
5673 if (parser->m_defaultHandler)
5674 reportDefault(parser, parser->m_encoding, s, next);
5675 break;
5676 case XML_TOK_PI:
5677 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5678 return XML_ERROR_NO_MEMORY;
5679 break;
5680 case XML_TOK_COMMENT:
5681 if (! reportComment(parser, parser->m_encoding, s, next))
5682 return XML_ERROR_NO_MEMORY;
5683 break;
5684 case XML_TOK_INVALID:
5685 parser->m_eventPtr = next;
5686 return XML_ERROR_INVALID_TOKEN;
5687 case XML_TOK_PARTIAL:
5688 if (! parser->m_parsingStatus.finalBuffer) {
5689 *nextPtr = s;
5690 return XML_ERROR_NONE;
5692 return XML_ERROR_UNCLOSED_TOKEN;
5693 case XML_TOK_PARTIAL_CHAR:
5694 if (! parser->m_parsingStatus.finalBuffer) {
5695 *nextPtr = s;
5696 return XML_ERROR_NONE;
5698 return XML_ERROR_PARTIAL_CHAR;
5699 default:
5700 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5702 parser->m_eventPtr = s = next;
5703 switch (parser->m_parsingStatus.parsing) {
5704 case XML_SUSPENDED:
5705 *nextPtr = next;
5706 return XML_ERROR_NONE;
5707 case XML_FINISHED:
5708 return XML_ERROR_ABORTED;
5709 default:;
5714 static enum XML_Error
5715 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5716 const char *textStart, *textEnd;
5717 const char *next;
5718 enum XML_Error result;
5719 OPEN_INTERNAL_ENTITY *openEntity;
5721 if (parser->m_freeInternalEntities) {
5722 openEntity = parser->m_freeInternalEntities;
5723 parser->m_freeInternalEntities = openEntity->next;
5724 } else {
5725 openEntity
5726 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5727 if (! openEntity)
5728 return XML_ERROR_NO_MEMORY;
5730 entity->open = XML_TRUE;
5731 #ifdef XML_DTD
5732 entityTrackingOnOpen(parser, entity, __LINE__);
5733 #endif
5734 entity->processed = 0;
5735 openEntity->next = parser->m_openInternalEntities;
5736 parser->m_openInternalEntities = openEntity;
5737 openEntity->entity = entity;
5738 openEntity->startTagLevel = parser->m_tagLevel;
5739 openEntity->betweenDecl = betweenDecl;
5740 openEntity->internalEventPtr = NULL;
5741 openEntity->internalEventEndPtr = NULL;
5742 textStart = (const char *)entity->textPtr;
5743 textEnd = (const char *)(entity->textPtr + entity->textLen);
5744 /* Set a safe default value in case 'next' does not get set */
5745 next = textStart;
5747 #ifdef XML_DTD
5748 if (entity->is_param) {
5749 int tok
5750 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5751 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5752 tok, next, &next, XML_FALSE, XML_FALSE,
5753 XML_ACCOUNT_ENTITY_EXPANSION);
5754 } else
5755 #endif /* XML_DTD */
5756 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5757 textStart, textEnd, &next, XML_FALSE,
5758 XML_ACCOUNT_ENTITY_EXPANSION);
5760 if (result == XML_ERROR_NONE) {
5761 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5762 entity->processed = (int)(next - textStart);
5763 parser->m_processor = internalEntityProcessor;
5764 } else {
5765 #ifdef XML_DTD
5766 entityTrackingOnClose(parser, entity, __LINE__);
5767 #endif /* XML_DTD */
5768 entity->open = XML_FALSE;
5769 parser->m_openInternalEntities = openEntity->next;
5770 /* put openEntity back in list of free instances */
5771 openEntity->next = parser->m_freeInternalEntities;
5772 parser->m_freeInternalEntities = openEntity;
5775 return result;
5778 static enum XML_Error PTRCALL
5779 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5780 const char **nextPtr) {
5781 ENTITY *entity;
5782 const char *textStart, *textEnd;
5783 const char *next;
5784 enum XML_Error result;
5785 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5786 if (! openEntity)
5787 return XML_ERROR_UNEXPECTED_STATE;
5789 entity = openEntity->entity;
5790 textStart = ((const char *)entity->textPtr) + entity->processed;
5791 textEnd = (const char *)(entity->textPtr + entity->textLen);
5792 /* Set a safe default value in case 'next' does not get set */
5793 next = textStart;
5795 #ifdef XML_DTD
5796 if (entity->is_param) {
5797 int tok
5798 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5799 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5800 tok, next, &next, XML_FALSE, XML_TRUE,
5801 XML_ACCOUNT_ENTITY_EXPANSION);
5802 } else
5803 #endif /* XML_DTD */
5804 result = doContent(parser, openEntity->startTagLevel,
5805 parser->m_internalEncoding, textStart, textEnd, &next,
5806 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5808 if (result != XML_ERROR_NONE)
5809 return result;
5811 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5812 entity->processed = (int)(next - (const char *)entity->textPtr);
5813 return result;
5816 #ifdef XML_DTD
5817 entityTrackingOnClose(parser, entity, __LINE__);
5818 #endif
5819 entity->open = XML_FALSE;
5820 parser->m_openInternalEntities = openEntity->next;
5821 /* put openEntity back in list of free instances */
5822 openEntity->next = parser->m_freeInternalEntities;
5823 parser->m_freeInternalEntities = openEntity;
5825 // If there are more open entities we want to stop right here and have the
5826 // upcoming call to XML_ResumeParser continue with entity content, or it would
5827 // be ignored altogether.
5828 if (parser->m_openInternalEntities != NULL
5829 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5830 return XML_ERROR_NONE;
5833 #ifdef XML_DTD
5834 if (entity->is_param) {
5835 int tok;
5836 parser->m_processor = prologProcessor;
5837 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5838 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5839 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5840 XML_ACCOUNT_DIRECT);
5841 } else
5842 #endif /* XML_DTD */
5844 parser->m_processor = contentProcessor;
5845 /* see externalEntityContentProcessor vs contentProcessor */
5846 result = doContent(parser, parser->m_parentParser ? 1 : 0,
5847 parser->m_encoding, s, end, nextPtr,
5848 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5849 XML_ACCOUNT_DIRECT);
5850 if (result == XML_ERROR_NONE) {
5851 if (! storeRawNames(parser))
5852 return XML_ERROR_NO_MEMORY;
5854 return result;
5858 static enum XML_Error PTRCALL
5859 errorProcessor(XML_Parser parser, const char *s, const char *end,
5860 const char **nextPtr) {
5861 UNUSED_P(s);
5862 UNUSED_P(end);
5863 UNUSED_P(nextPtr);
5864 return parser->m_errorCode;
5867 static enum XML_Error
5868 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5869 const char *ptr, const char *end, STRING_POOL *pool,
5870 enum XML_Account account) {
5871 enum XML_Error result
5872 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5873 if (result)
5874 return result;
5875 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5876 poolChop(pool);
5877 if (! poolAppendChar(pool, XML_T('\0')))
5878 return XML_ERROR_NO_MEMORY;
5879 return XML_ERROR_NONE;
5882 static enum XML_Error
5883 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5884 const char *ptr, const char *end, STRING_POOL *pool,
5885 enum XML_Account account) {
5886 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5887 #ifndef XML_DTD
5888 UNUSED_P(account);
5889 #endif
5891 for (;;) {
5892 const char *next
5893 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5894 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5895 #ifdef XML_DTD
5896 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5897 accountingOnAbort(parser);
5898 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5900 #endif
5901 switch (tok) {
5902 case XML_TOK_NONE:
5903 return XML_ERROR_NONE;
5904 case XML_TOK_INVALID:
5905 if (enc == parser->m_encoding)
5906 parser->m_eventPtr = next;
5907 return XML_ERROR_INVALID_TOKEN;
5908 case XML_TOK_PARTIAL:
5909 if (enc == parser->m_encoding)
5910 parser->m_eventPtr = ptr;
5911 return XML_ERROR_INVALID_TOKEN;
5912 case XML_TOK_CHAR_REF: {
5913 XML_Char buf[XML_ENCODE_MAX];
5914 int i;
5915 int n = XmlCharRefNumber(enc, ptr);
5916 if (n < 0) {
5917 if (enc == parser->m_encoding)
5918 parser->m_eventPtr = ptr;
5919 return XML_ERROR_BAD_CHAR_REF;
5921 if (! isCdata && n == 0x20 /* space */
5922 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5923 break;
5924 n = XmlEncode(n, (ICHAR *)buf);
5925 /* The XmlEncode() functions can never return 0 here. That
5926 * error return happens if the code point passed in is either
5927 * negative or greater than or equal to 0x110000. The
5928 * XmlCharRefNumber() functions will all return a number
5929 * strictly less than 0x110000 or a negative value if an error
5930 * occurred. The negative value is intercepted above, so
5931 * XmlEncode() is never passed a value it might return an
5932 * error for.
5934 for (i = 0; i < n; i++) {
5935 if (! poolAppendChar(pool, buf[i]))
5936 return XML_ERROR_NO_MEMORY;
5938 } break;
5939 case XML_TOK_DATA_CHARS:
5940 if (! poolAppend(pool, enc, ptr, next))
5941 return XML_ERROR_NO_MEMORY;
5942 break;
5943 case XML_TOK_TRAILING_CR:
5944 next = ptr + enc->minBytesPerChar;
5945 /* fall through */
5946 case XML_TOK_ATTRIBUTE_VALUE_S:
5947 case XML_TOK_DATA_NEWLINE:
5948 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5949 break;
5950 if (! poolAppendChar(pool, 0x20))
5951 return XML_ERROR_NO_MEMORY;
5952 break;
5953 case XML_TOK_ENTITY_REF: {
5954 const XML_Char *name;
5955 ENTITY *entity;
5956 char checkEntityDecl;
5957 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5958 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5959 if (ch) {
5960 #ifdef XML_DTD
5961 /* NOTE: We are replacing 4-6 characters original input for 1 character
5962 * so there is no amplification and hence recording without
5963 * protection. */
5964 accountingDiffTolerated(parser, tok, (char *)&ch,
5965 ((char *)&ch) + sizeof(XML_Char), __LINE__,
5966 XML_ACCOUNT_ENTITY_EXPANSION);
5967 #endif /* XML_DTD */
5968 if (! poolAppendChar(pool, ch))
5969 return XML_ERROR_NO_MEMORY;
5970 break;
5972 name = poolStoreString(&parser->m_temp2Pool, enc,
5973 ptr + enc->minBytesPerChar,
5974 next - enc->minBytesPerChar);
5975 if (! name)
5976 return XML_ERROR_NO_MEMORY;
5977 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5978 poolDiscard(&parser->m_temp2Pool);
5979 /* First, determine if a check for an existing declaration is needed;
5980 if yes, check that the entity exists, and that it is internal.
5982 if (pool == &dtd->pool) /* are we called from prolog? */
5983 checkEntityDecl =
5984 #ifdef XML_DTD
5985 parser->m_prologState.documentEntity &&
5986 #endif /* XML_DTD */
5987 (dtd->standalone ? ! parser->m_openInternalEntities
5988 : ! dtd->hasParamEntityRefs);
5989 else /* if (pool == &parser->m_tempPool): we are called from content */
5990 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5991 if (checkEntityDecl) {
5992 if (! entity)
5993 return XML_ERROR_UNDEFINED_ENTITY;
5994 else if (! entity->is_internal)
5995 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5996 } else if (! entity) {
5997 /* Cannot report skipped entity here - see comments on
5998 parser->m_skippedEntityHandler.
5999 if (parser->m_skippedEntityHandler)
6000 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6002 /* Cannot call the default handler because this would be
6003 out of sync with the call to the startElementHandler.
6004 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6005 reportDefault(parser, enc, ptr, next);
6007 break;
6009 if (entity->open) {
6010 if (enc == parser->m_encoding) {
6011 /* It does not appear that this line can be executed.
6013 * The "if (entity->open)" check catches recursive entity
6014 * definitions. In order to be called with an open
6015 * entity, it must have gone through this code before and
6016 * been through the recursive call to
6017 * appendAttributeValue() some lines below. That call
6018 * sets the local encoding ("enc") to the parser's
6019 * internal encoding (internal_utf8 or internal_utf16),
6020 * which can never be the same as the principle encoding.
6021 * It doesn't appear there is another code path that gets
6022 * here with entity->open being TRUE.
6024 * Since it is not certain that this logic is watertight,
6025 * we keep the line and merely exclude it from coverage
6026 * tests.
6028 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6030 return XML_ERROR_RECURSIVE_ENTITY_REF;
6032 if (entity->notation) {
6033 if (enc == parser->m_encoding)
6034 parser->m_eventPtr = ptr;
6035 return XML_ERROR_BINARY_ENTITY_REF;
6037 if (! entity->textPtr) {
6038 if (enc == parser->m_encoding)
6039 parser->m_eventPtr = ptr;
6040 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6041 } else {
6042 enum XML_Error result;
6043 const XML_Char *textEnd = entity->textPtr + entity->textLen;
6044 entity->open = XML_TRUE;
6045 #ifdef XML_DTD
6046 entityTrackingOnOpen(parser, entity, __LINE__);
6047 #endif
6048 result = appendAttributeValue(parser, parser->m_internalEncoding,
6049 isCdata, (const char *)entity->textPtr,
6050 (const char *)textEnd, pool,
6051 XML_ACCOUNT_ENTITY_EXPANSION);
6052 #ifdef XML_DTD
6053 entityTrackingOnClose(parser, entity, __LINE__);
6054 #endif
6055 entity->open = XML_FALSE;
6056 if (result)
6057 return result;
6059 } break;
6060 default:
6061 /* The only token returned by XmlAttributeValueTok() that does
6062 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6063 * Getting that would require an entity name to contain an
6064 * incomplete XML character (e.g. \xE2\x82); however previous
6065 * tokenisers will have already recognised and rejected such
6066 * names before XmlAttributeValueTok() gets a look-in. This
6067 * default case should be retained as a safety net, but the code
6068 * excluded from coverage tests.
6070 * LCOV_EXCL_START
6072 if (enc == parser->m_encoding)
6073 parser->m_eventPtr = ptr;
6074 return XML_ERROR_UNEXPECTED_STATE;
6075 /* LCOV_EXCL_STOP */
6077 ptr = next;
6079 /* not reached */
6082 static enum XML_Error
6083 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6084 const char *entityTextPtr, const char *entityTextEnd,
6085 enum XML_Account account) {
6086 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6087 STRING_POOL *pool = &(dtd->entityValuePool);
6088 enum XML_Error result = XML_ERROR_NONE;
6089 #ifdef XML_DTD
6090 int oldInEntityValue = parser->m_prologState.inEntityValue;
6091 parser->m_prologState.inEntityValue = 1;
6092 #else
6093 UNUSED_P(account);
6094 #endif /* XML_DTD */
6095 /* never return Null for the value argument in EntityDeclHandler,
6096 since this would indicate an external entity; therefore we
6097 have to make sure that entityValuePool.start is not null */
6098 if (! pool->blocks) {
6099 if (! poolGrow(pool))
6100 return XML_ERROR_NO_MEMORY;
6103 for (;;) {
6104 const char *next
6105 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6106 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6108 #ifdef XML_DTD
6109 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6110 account)) {
6111 accountingOnAbort(parser);
6112 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6113 goto endEntityValue;
6115 #endif
6117 switch (tok) {
6118 case XML_TOK_PARAM_ENTITY_REF:
6119 #ifdef XML_DTD
6120 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6121 const XML_Char *name;
6122 ENTITY *entity;
6123 name = poolStoreString(&parser->m_tempPool, enc,
6124 entityTextPtr + enc->minBytesPerChar,
6125 next - enc->minBytesPerChar);
6126 if (! name) {
6127 result = XML_ERROR_NO_MEMORY;
6128 goto endEntityValue;
6130 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6131 poolDiscard(&parser->m_tempPool);
6132 if (! entity) {
6133 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6134 /* cannot report skipped entity here - see comments on
6135 parser->m_skippedEntityHandler
6136 if (parser->m_skippedEntityHandler)
6137 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6139 dtd->keepProcessing = dtd->standalone;
6140 goto endEntityValue;
6142 if (entity->open) {
6143 if (enc == parser->m_encoding)
6144 parser->m_eventPtr = entityTextPtr;
6145 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6146 goto endEntityValue;
6148 if (entity->systemId) {
6149 if (parser->m_externalEntityRefHandler) {
6150 dtd->paramEntityRead = XML_FALSE;
6151 entity->open = XML_TRUE;
6152 entityTrackingOnOpen(parser, entity, __LINE__);
6153 if (! parser->m_externalEntityRefHandler(
6154 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6155 entity->systemId, entity->publicId)) {
6156 entityTrackingOnClose(parser, entity, __LINE__);
6157 entity->open = XML_FALSE;
6158 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6159 goto endEntityValue;
6161 entityTrackingOnClose(parser, entity, __LINE__);
6162 entity->open = XML_FALSE;
6163 if (! dtd->paramEntityRead)
6164 dtd->keepProcessing = dtd->standalone;
6165 } else
6166 dtd->keepProcessing = dtd->standalone;
6167 } else {
6168 entity->open = XML_TRUE;
6169 entityTrackingOnOpen(parser, entity, __LINE__);
6170 result = storeEntityValue(
6171 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6172 (const char *)(entity->textPtr + entity->textLen),
6173 XML_ACCOUNT_ENTITY_EXPANSION);
6174 entityTrackingOnClose(parser, entity, __LINE__);
6175 entity->open = XML_FALSE;
6176 if (result)
6177 goto endEntityValue;
6179 break;
6181 #endif /* XML_DTD */
6182 /* In the internal subset, PE references are not legal
6183 within markup declarations, e.g entity values in this case. */
6184 parser->m_eventPtr = entityTextPtr;
6185 result = XML_ERROR_PARAM_ENTITY_REF;
6186 goto endEntityValue;
6187 case XML_TOK_NONE:
6188 result = XML_ERROR_NONE;
6189 goto endEntityValue;
6190 case XML_TOK_ENTITY_REF:
6191 case XML_TOK_DATA_CHARS:
6192 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6193 result = XML_ERROR_NO_MEMORY;
6194 goto endEntityValue;
6196 break;
6197 case XML_TOK_TRAILING_CR:
6198 next = entityTextPtr + enc->minBytesPerChar;
6199 /* fall through */
6200 case XML_TOK_DATA_NEWLINE:
6201 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6202 result = XML_ERROR_NO_MEMORY;
6203 goto endEntityValue;
6205 *(pool->ptr)++ = 0xA;
6206 break;
6207 case XML_TOK_CHAR_REF: {
6208 XML_Char buf[XML_ENCODE_MAX];
6209 int i;
6210 int n = XmlCharRefNumber(enc, entityTextPtr);
6211 if (n < 0) {
6212 if (enc == parser->m_encoding)
6213 parser->m_eventPtr = entityTextPtr;
6214 result = XML_ERROR_BAD_CHAR_REF;
6215 goto endEntityValue;
6217 n = XmlEncode(n, (ICHAR *)buf);
6218 /* The XmlEncode() functions can never return 0 here. That
6219 * error return happens if the code point passed in is either
6220 * negative or greater than or equal to 0x110000. The
6221 * XmlCharRefNumber() functions will all return a number
6222 * strictly less than 0x110000 or a negative value if an error
6223 * occurred. The negative value is intercepted above, so
6224 * XmlEncode() is never passed a value it might return an
6225 * error for.
6227 for (i = 0; i < n; i++) {
6228 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6229 result = XML_ERROR_NO_MEMORY;
6230 goto endEntityValue;
6232 *(pool->ptr)++ = buf[i];
6234 } break;
6235 case XML_TOK_PARTIAL:
6236 if (enc == parser->m_encoding)
6237 parser->m_eventPtr = entityTextPtr;
6238 result = XML_ERROR_INVALID_TOKEN;
6239 goto endEntityValue;
6240 case XML_TOK_INVALID:
6241 if (enc == parser->m_encoding)
6242 parser->m_eventPtr = next;
6243 result = XML_ERROR_INVALID_TOKEN;
6244 goto endEntityValue;
6245 default:
6246 /* This default case should be unnecessary -- all the tokens
6247 * that XmlEntityValueTok() can return have their own explicit
6248 * cases -- but should be retained for safety. We do however
6249 * exclude it from the coverage statistics.
6251 * LCOV_EXCL_START
6253 if (enc == parser->m_encoding)
6254 parser->m_eventPtr = entityTextPtr;
6255 result = XML_ERROR_UNEXPECTED_STATE;
6256 goto endEntityValue;
6257 /* LCOV_EXCL_STOP */
6259 entityTextPtr = next;
6261 endEntityValue:
6262 #ifdef XML_DTD
6263 parser->m_prologState.inEntityValue = oldInEntityValue;
6264 #endif /* XML_DTD */
6265 return result;
6268 static void FASTCALL
6269 normalizeLines(XML_Char *s) {
6270 XML_Char *p;
6271 for (;; s++) {
6272 if (*s == XML_T('\0'))
6273 return;
6274 if (*s == 0xD)
6275 break;
6277 p = s;
6278 do {
6279 if (*s == 0xD) {
6280 *p++ = 0xA;
6281 if (*++s == 0xA)
6282 s++;
6283 } else
6284 *p++ = *s++;
6285 } while (*s);
6286 *p = XML_T('\0');
6289 static int
6290 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6291 const char *start, const char *end) {
6292 const XML_Char *target;
6293 XML_Char *data;
6294 const char *tem;
6295 if (! parser->m_processingInstructionHandler) {
6296 if (parser->m_defaultHandler)
6297 reportDefault(parser, enc, start, end);
6298 return 1;
6300 start += enc->minBytesPerChar * 2;
6301 tem = start + XmlNameLength(enc, start);
6302 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6303 if (! target)
6304 return 0;
6305 poolFinish(&parser->m_tempPool);
6306 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6307 end - enc->minBytesPerChar * 2);
6308 if (! data)
6309 return 0;
6310 normalizeLines(data);
6311 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6312 poolClear(&parser->m_tempPool);
6313 return 1;
6316 static int
6317 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6318 const char *end) {
6319 XML_Char *data;
6320 if (! parser->m_commentHandler) {
6321 if (parser->m_defaultHandler)
6322 reportDefault(parser, enc, start, end);
6323 return 1;
6325 data = poolStoreString(&parser->m_tempPool, enc,
6326 start + enc->minBytesPerChar * 4,
6327 end - enc->minBytesPerChar * 3);
6328 if (! data)
6329 return 0;
6330 normalizeLines(data);
6331 parser->m_commentHandler(parser->m_handlerArg, data);
6332 poolClear(&parser->m_tempPool);
6333 return 1;
6336 static void
6337 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6338 const char *end) {
6339 if (MUST_CONVERT(enc, s)) {
6340 enum XML_Convert_Result convert_res;
6341 const char **eventPP;
6342 const char **eventEndPP;
6343 if (enc == parser->m_encoding) {
6344 eventPP = &parser->m_eventPtr;
6345 eventEndPP = &parser->m_eventEndPtr;
6346 } else {
6347 /* To get here, two things must be true; the parser must be
6348 * using a character encoding that is not the same as the
6349 * encoding passed in, and the encoding passed in must need
6350 * conversion to the internal format (UTF-8 unless XML_UNICODE
6351 * is defined). The only occasions on which the encoding passed
6352 * in is not the same as the parser's encoding are when it is
6353 * the internal encoding (e.g. a previously defined parameter
6354 * entity, already converted to internal format). This by
6355 * definition doesn't need conversion, so the whole branch never
6356 * gets executed.
6358 * For safety's sake we don't delete these lines and merely
6359 * exclude them from coverage statistics.
6361 * LCOV_EXCL_START
6363 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6364 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6365 /* LCOV_EXCL_STOP */
6367 do {
6368 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6369 convert_res
6370 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6371 *eventEndPP = s;
6372 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6373 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6374 *eventPP = s;
6375 } while ((convert_res != XML_CONVERT_COMPLETED)
6376 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6377 } else
6378 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
6379 (int)((XML_Char *)end - (XML_Char *)s));
6382 static int
6383 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6384 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6385 DEFAULT_ATTRIBUTE *att;
6386 if (value || isId) {
6387 /* The handling of default attributes gets messed up if we have
6388 a default which duplicates a non-default. */
6389 int i;
6390 for (i = 0; i < type->nDefaultAtts; i++)
6391 if (attId == type->defaultAtts[i].id)
6392 return 1;
6393 if (isId && ! type->idAtt && ! attId->xmlns)
6394 type->idAtt = attId;
6396 if (type->nDefaultAtts == type->allocDefaultAtts) {
6397 if (type->allocDefaultAtts == 0) {
6398 type->allocDefaultAtts = 8;
6399 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6400 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6401 if (! type->defaultAtts) {
6402 type->allocDefaultAtts = 0;
6403 return 0;
6405 } else {
6406 DEFAULT_ATTRIBUTE *temp;
6408 /* Detect and prevent integer overflow */
6409 if (type->allocDefaultAtts > INT_MAX / 2) {
6410 return 0;
6413 int count = type->allocDefaultAtts * 2;
6415 /* Detect and prevent integer overflow.
6416 * The preprocessor guard addresses the "always false" warning
6417 * from -Wtype-limits on platforms where
6418 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6419 #if UINT_MAX >= SIZE_MAX
6420 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6421 return 0;
6423 #endif
6425 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6426 (count * sizeof(DEFAULT_ATTRIBUTE)));
6427 if (temp == NULL)
6428 return 0;
6429 type->allocDefaultAtts = count;
6430 type->defaultAtts = temp;
6433 att = type->defaultAtts + type->nDefaultAtts;
6434 att->id = attId;
6435 att->value = value;
6436 att->isCdata = isCdata;
6437 if (! isCdata)
6438 attId->maybeTokenized = XML_TRUE;
6439 type->nDefaultAtts += 1;
6440 return 1;
6443 static int
6444 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6445 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6446 const XML_Char *name;
6447 for (name = elementType->name; *name; name++) {
6448 if (*name == XML_T(ASCII_COLON)) {
6449 PREFIX *prefix;
6450 const XML_Char *s;
6451 for (s = elementType->name; s != name; s++) {
6452 if (! poolAppendChar(&dtd->pool, *s))
6453 return 0;
6455 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6456 return 0;
6457 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6458 sizeof(PREFIX));
6459 if (! prefix)
6460 return 0;
6461 if (prefix->name == poolStart(&dtd->pool))
6462 poolFinish(&dtd->pool);
6463 else
6464 poolDiscard(&dtd->pool);
6465 elementType->prefix = prefix;
6466 break;
6469 return 1;
6472 static ATTRIBUTE_ID *
6473 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6474 const char *end) {
6475 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6476 ATTRIBUTE_ID *id;
6477 const XML_Char *name;
6478 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6479 return NULL;
6480 name = poolStoreString(&dtd->pool, enc, start, end);
6481 if (! name)
6482 return NULL;
6483 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6484 ++name;
6485 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6486 sizeof(ATTRIBUTE_ID));
6487 if (! id)
6488 return NULL;
6489 if (id->name != name)
6490 poolDiscard(&dtd->pool);
6491 else {
6492 poolFinish(&dtd->pool);
6493 if (! parser->m_ns)
6495 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6496 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6497 && name[4] == XML_T(ASCII_s)
6498 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6499 if (name[5] == XML_T('\0'))
6500 id->prefix = &dtd->defaultPrefix;
6501 else
6502 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6503 sizeof(PREFIX));
6504 id->xmlns = XML_TRUE;
6505 } else {
6506 int i;
6507 for (i = 0; name[i]; i++) {
6508 /* attributes without prefix are *not* in the default namespace */
6509 if (name[i] == XML_T(ASCII_COLON)) {
6510 int j;
6511 for (j = 0; j < i; j++) {
6512 if (! poolAppendChar(&dtd->pool, name[j]))
6513 return NULL;
6515 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6516 return NULL;
6517 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6518 poolStart(&dtd->pool), sizeof(PREFIX));
6519 if (! id->prefix)
6520 return NULL;
6521 if (id->prefix->name == poolStart(&dtd->pool))
6522 poolFinish(&dtd->pool);
6523 else
6524 poolDiscard(&dtd->pool);
6525 break;
6530 return id;
6533 #define CONTEXT_SEP XML_T(ASCII_FF)
6535 static const XML_Char *
6536 getContext(XML_Parser parser) {
6537 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6538 HASH_TABLE_ITER iter;
6539 XML_Bool needSep = XML_FALSE;
6541 if (dtd->defaultPrefix.binding) {
6542 int i;
6543 int len;
6544 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6545 return NULL;
6546 len = dtd->defaultPrefix.binding->uriLen;
6547 if (parser->m_namespaceSeparator)
6548 len--;
6549 for (i = 0; i < len; i++) {
6550 if (! poolAppendChar(&parser->m_tempPool,
6551 dtd->defaultPrefix.binding->uri[i])) {
6552 /* Because of memory caching, I don't believe this line can be
6553 * executed.
6555 * This is part of a loop copying the default prefix binding
6556 * URI into the parser's temporary string pool. Previously,
6557 * that URI was copied into the same string pool, with a
6558 * terminating NUL character, as part of setContext(). When
6559 * the pool was cleared, that leaves a block definitely big
6560 * enough to hold the URI on the free block list of the pool.
6561 * The URI copy in getContext() therefore cannot run out of
6562 * memory.
6564 * If the pool is used between the setContext() and
6565 * getContext() calls, the worst it can do is leave a bigger
6566 * block on the front of the free list. Given that this is
6567 * all somewhat inobvious and program logic can be changed, we
6568 * don't delete the line but we do exclude it from the test
6569 * coverage statistics.
6571 return NULL; /* LCOV_EXCL_LINE */
6574 needSep = XML_TRUE;
6577 hashTableIterInit(&iter, &(dtd->prefixes));
6578 for (;;) {
6579 int i;
6580 int len;
6581 const XML_Char *s;
6582 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6583 if (! prefix)
6584 break;
6585 if (! prefix->binding) {
6586 /* This test appears to be (justifiable) paranoia. There does
6587 * not seem to be a way of injecting a prefix without a binding
6588 * that doesn't get errored long before this function is called.
6589 * The test should remain for safety's sake, so we instead
6590 * exclude the following line from the coverage statistics.
6592 continue; /* LCOV_EXCL_LINE */
6594 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6595 return NULL;
6596 for (s = prefix->name; *s; s++)
6597 if (! poolAppendChar(&parser->m_tempPool, *s))
6598 return NULL;
6599 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6600 return NULL;
6601 len = prefix->binding->uriLen;
6602 if (parser->m_namespaceSeparator)
6603 len--;
6604 for (i = 0; i < len; i++)
6605 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6606 return NULL;
6607 needSep = XML_TRUE;
6610 hashTableIterInit(&iter, &(dtd->generalEntities));
6611 for (;;) {
6612 const XML_Char *s;
6613 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6614 if (! e)
6615 break;
6616 if (! e->open)
6617 continue;
6618 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6619 return NULL;
6620 for (s = e->name; *s; s++)
6621 if (! poolAppendChar(&parser->m_tempPool, *s))
6622 return 0;
6623 needSep = XML_TRUE;
6626 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6627 return NULL;
6628 return parser->m_tempPool.start;
6631 static XML_Bool
6632 setContext(XML_Parser parser, const XML_Char *context) {
6633 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6634 const XML_Char *s = context;
6636 while (*context != XML_T('\0')) {
6637 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6638 ENTITY *e;
6639 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6640 return XML_FALSE;
6641 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6642 poolStart(&parser->m_tempPool), 0);
6643 if (e)
6644 e->open = XML_TRUE;
6645 if (*s != XML_T('\0'))
6646 s++;
6647 context = s;
6648 poolDiscard(&parser->m_tempPool);
6649 } else if (*s == XML_T(ASCII_EQUALS)) {
6650 PREFIX *prefix;
6651 if (poolLength(&parser->m_tempPool) == 0)
6652 prefix = &dtd->defaultPrefix;
6653 else {
6654 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6655 return XML_FALSE;
6656 prefix
6657 = (PREFIX *)lookup(parser, &dtd->prefixes,
6658 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6659 if (! prefix)
6660 return XML_FALSE;
6661 if (prefix->name == poolStart(&parser->m_tempPool)) {
6662 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6663 if (! prefix->name)
6664 return XML_FALSE;
6666 poolDiscard(&parser->m_tempPool);
6668 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6669 context++)
6670 if (! poolAppendChar(&parser->m_tempPool, *context))
6671 return XML_FALSE;
6672 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6673 return XML_FALSE;
6674 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6675 &parser->m_inheritedBindings)
6676 != XML_ERROR_NONE)
6677 return XML_FALSE;
6678 poolDiscard(&parser->m_tempPool);
6679 if (*context != XML_T('\0'))
6680 ++context;
6681 s = context;
6682 } else {
6683 if (! poolAppendChar(&parser->m_tempPool, *s))
6684 return XML_FALSE;
6685 s++;
6688 return XML_TRUE;
6691 static void FASTCALL
6692 normalizePublicId(XML_Char *publicId) {
6693 XML_Char *p = publicId;
6694 XML_Char *s;
6695 for (s = publicId; *s; s++) {
6696 switch (*s) {
6697 case 0x20:
6698 case 0xD:
6699 case 0xA:
6700 if (p != publicId && p[-1] != 0x20)
6701 *p++ = 0x20;
6702 break;
6703 default:
6704 *p++ = *s;
6707 if (p != publicId && p[-1] == 0x20)
6708 --p;
6709 *p = XML_T('\0');
6712 static DTD *
6713 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6714 DTD *p = ms->malloc_fcn(sizeof(DTD));
6715 if (p == NULL)
6716 return p;
6717 poolInit(&(p->pool), ms);
6718 poolInit(&(p->entityValuePool), ms);
6719 hashTableInit(&(p->generalEntities), ms);
6720 hashTableInit(&(p->elementTypes), ms);
6721 hashTableInit(&(p->attributeIds), ms);
6722 hashTableInit(&(p->prefixes), ms);
6723 #ifdef XML_DTD
6724 p->paramEntityRead = XML_FALSE;
6725 hashTableInit(&(p->paramEntities), ms);
6726 #endif /* XML_DTD */
6727 p->defaultPrefix.name = NULL;
6728 p->defaultPrefix.binding = NULL;
6730 p->in_eldecl = XML_FALSE;
6731 p->scaffIndex = NULL;
6732 p->scaffold = NULL;
6733 p->scaffLevel = 0;
6734 p->scaffSize = 0;
6735 p->scaffCount = 0;
6736 p->contentStringLen = 0;
6738 p->keepProcessing = XML_TRUE;
6739 p->hasParamEntityRefs = XML_FALSE;
6740 p->standalone = XML_FALSE;
6741 return p;
6744 static void
6745 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6746 HASH_TABLE_ITER iter;
6747 hashTableIterInit(&iter, &(p->elementTypes));
6748 for (;;) {
6749 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6750 if (! e)
6751 break;
6752 if (e->allocDefaultAtts != 0)
6753 ms->free_fcn(e->defaultAtts);
6755 hashTableClear(&(p->generalEntities));
6756 #ifdef XML_DTD
6757 p->paramEntityRead = XML_FALSE;
6758 hashTableClear(&(p->paramEntities));
6759 #endif /* XML_DTD */
6760 hashTableClear(&(p->elementTypes));
6761 hashTableClear(&(p->attributeIds));
6762 hashTableClear(&(p->prefixes));
6763 poolClear(&(p->pool));
6764 poolClear(&(p->entityValuePool));
6765 p->defaultPrefix.name = NULL;
6766 p->defaultPrefix.binding = NULL;
6768 p->in_eldecl = XML_FALSE;
6770 ms->free_fcn(p->scaffIndex);
6771 p->scaffIndex = NULL;
6772 ms->free_fcn(p->scaffold);
6773 p->scaffold = NULL;
6775 p->scaffLevel = 0;
6776 p->scaffSize = 0;
6777 p->scaffCount = 0;
6778 p->contentStringLen = 0;
6780 p->keepProcessing = XML_TRUE;
6781 p->hasParamEntityRefs = XML_FALSE;
6782 p->standalone = XML_FALSE;
6785 static void
6786 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6787 HASH_TABLE_ITER iter;
6788 hashTableIterInit(&iter, &(p->elementTypes));
6789 for (;;) {
6790 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6791 if (! e)
6792 break;
6793 if (e->allocDefaultAtts != 0)
6794 ms->free_fcn(e->defaultAtts);
6796 hashTableDestroy(&(p->generalEntities));
6797 #ifdef XML_DTD
6798 hashTableDestroy(&(p->paramEntities));
6799 #endif /* XML_DTD */
6800 hashTableDestroy(&(p->elementTypes));
6801 hashTableDestroy(&(p->attributeIds));
6802 hashTableDestroy(&(p->prefixes));
6803 poolDestroy(&(p->pool));
6804 poolDestroy(&(p->entityValuePool));
6805 if (isDocEntity) {
6806 ms->free_fcn(p->scaffIndex);
6807 ms->free_fcn(p->scaffold);
6809 ms->free_fcn(p);
6812 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6813 The new DTD has already been initialized.
6815 static int
6816 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6817 const XML_Memory_Handling_Suite *ms) {
6818 HASH_TABLE_ITER iter;
6820 /* Copy the prefix table. */
6822 hashTableIterInit(&iter, &(oldDtd->prefixes));
6823 for (;;) {
6824 const XML_Char *name;
6825 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6826 if (! oldP)
6827 break;
6828 name = poolCopyString(&(newDtd->pool), oldP->name);
6829 if (! name)
6830 return 0;
6831 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6832 return 0;
6835 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6837 /* Copy the attribute id table. */
6839 for (;;) {
6840 ATTRIBUTE_ID *newA;
6841 const XML_Char *name;
6842 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6844 if (! oldA)
6845 break;
6846 /* Remember to allocate the scratch byte before the name. */
6847 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6848 return 0;
6849 name = poolCopyString(&(newDtd->pool), oldA->name);
6850 if (! name)
6851 return 0;
6852 ++name;
6853 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6854 sizeof(ATTRIBUTE_ID));
6855 if (! newA)
6856 return 0;
6857 newA->maybeTokenized = oldA->maybeTokenized;
6858 if (oldA->prefix) {
6859 newA->xmlns = oldA->xmlns;
6860 if (oldA->prefix == &oldDtd->defaultPrefix)
6861 newA->prefix = &newDtd->defaultPrefix;
6862 else
6863 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6864 oldA->prefix->name, 0);
6868 /* Copy the element type table. */
6870 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6872 for (;;) {
6873 int i;
6874 ELEMENT_TYPE *newE;
6875 const XML_Char *name;
6876 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6877 if (! oldE)
6878 break;
6879 name = poolCopyString(&(newDtd->pool), oldE->name);
6880 if (! name)
6881 return 0;
6882 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6883 sizeof(ELEMENT_TYPE));
6884 if (! newE)
6885 return 0;
6886 if (oldE->nDefaultAtts) {
6887 newE->defaultAtts
6888 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6889 if (! newE->defaultAtts) {
6890 return 0;
6893 if (oldE->idAtt)
6894 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6895 oldE->idAtt->name, 0);
6896 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6897 if (oldE->prefix)
6898 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6899 oldE->prefix->name, 0);
6900 for (i = 0; i < newE->nDefaultAtts; i++) {
6901 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6902 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6903 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6904 if (oldE->defaultAtts[i].value) {
6905 newE->defaultAtts[i].value
6906 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6907 if (! newE->defaultAtts[i].value)
6908 return 0;
6909 } else
6910 newE->defaultAtts[i].value = NULL;
6914 /* Copy the entity tables. */
6915 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6916 &(oldDtd->generalEntities)))
6917 return 0;
6919 #ifdef XML_DTD
6920 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6921 &(oldDtd->paramEntities)))
6922 return 0;
6923 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6924 #endif /* XML_DTD */
6926 newDtd->keepProcessing = oldDtd->keepProcessing;
6927 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6928 newDtd->standalone = oldDtd->standalone;
6930 /* Don't want deep copying for scaffolding */
6931 newDtd->in_eldecl = oldDtd->in_eldecl;
6932 newDtd->scaffold = oldDtd->scaffold;
6933 newDtd->contentStringLen = oldDtd->contentStringLen;
6934 newDtd->scaffSize = oldDtd->scaffSize;
6935 newDtd->scaffLevel = oldDtd->scaffLevel;
6936 newDtd->scaffIndex = oldDtd->scaffIndex;
6938 return 1;
6939 } /* End dtdCopy */
6941 static int
6942 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6943 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
6944 HASH_TABLE_ITER iter;
6945 const XML_Char *cachedOldBase = NULL;
6946 const XML_Char *cachedNewBase = NULL;
6948 hashTableIterInit(&iter, oldTable);
6950 for (;;) {
6951 ENTITY *newE;
6952 const XML_Char *name;
6953 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6954 if (! oldE)
6955 break;
6956 name = poolCopyString(newPool, oldE->name);
6957 if (! name)
6958 return 0;
6959 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6960 if (! newE)
6961 return 0;
6962 if (oldE->systemId) {
6963 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6964 if (! tem)
6965 return 0;
6966 newE->systemId = tem;
6967 if (oldE->base) {
6968 if (oldE->base == cachedOldBase)
6969 newE->base = cachedNewBase;
6970 else {
6971 cachedOldBase = oldE->base;
6972 tem = poolCopyString(newPool, cachedOldBase);
6973 if (! tem)
6974 return 0;
6975 cachedNewBase = newE->base = tem;
6978 if (oldE->publicId) {
6979 tem = poolCopyString(newPool, oldE->publicId);
6980 if (! tem)
6981 return 0;
6982 newE->publicId = tem;
6984 } else {
6985 const XML_Char *tem
6986 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6987 if (! tem)
6988 return 0;
6989 newE->textPtr = tem;
6990 newE->textLen = oldE->textLen;
6992 if (oldE->notation) {
6993 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6994 if (! tem)
6995 return 0;
6996 newE->notation = tem;
6998 newE->is_param = oldE->is_param;
6999 newE->is_internal = oldE->is_internal;
7001 return 1;
7004 #define INIT_POWER 6
7006 static XML_Bool FASTCALL
7007 keyeq(KEY s1, KEY s2) {
7008 for (; *s1 == *s2; s1++, s2++)
7009 if (*s1 == 0)
7010 return XML_TRUE;
7011 return XML_FALSE;
7014 static size_t
7015 keylen(KEY s) {
7016 size_t len = 0;
7017 for (; *s; s++, len++)
7019 return len;
7022 static void
7023 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7024 key->k[0] = 0;
7025 key->k[1] = get_hash_secret_salt(parser);
7028 static unsigned long FASTCALL
7029 hash(XML_Parser parser, KEY s) {
7030 struct siphash state;
7031 struct sipkey key;
7032 (void)sip24_valid;
7033 copy_salt_to_sipkey(parser, &key);
7034 sip24_init(&state, &key);
7035 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7036 return (unsigned long)sip24_final(&state);
7039 static NAMED *
7040 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7041 size_t i;
7042 if (table->size == 0) {
7043 size_t tsize;
7044 if (! createSize)
7045 return NULL;
7046 table->power = INIT_POWER;
7047 /* table->size is a power of 2 */
7048 table->size = (size_t)1 << INIT_POWER;
7049 tsize = table->size * sizeof(NAMED *);
7050 table->v = table->mem->malloc_fcn(tsize);
7051 if (! table->v) {
7052 table->size = 0;
7053 return NULL;
7055 memset(table->v, 0, tsize);
7056 i = hash(parser, name) & ((unsigned long)table->size - 1);
7057 } else {
7058 unsigned long h = hash(parser, name);
7059 unsigned long mask = (unsigned long)table->size - 1;
7060 unsigned char step = 0;
7061 i = h & mask;
7062 while (table->v[i]) {
7063 if (keyeq(name, table->v[i]->name))
7064 return table->v[i];
7065 if (! step)
7066 step = PROBE_STEP(h, mask, table->power);
7067 i < step ? (i += table->size - step) : (i -= step);
7069 if (! createSize)
7070 return NULL;
7072 /* check for overflow (table is half full) */
7073 if (table->used >> (table->power - 1)) {
7074 unsigned char newPower = table->power + 1;
7076 /* Detect and prevent invalid shift */
7077 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7078 return NULL;
7081 size_t newSize = (size_t)1 << newPower;
7082 unsigned long newMask = (unsigned long)newSize - 1;
7084 /* Detect and prevent integer overflow */
7085 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7086 return NULL;
7089 size_t tsize = newSize * sizeof(NAMED *);
7090 NAMED **newV = table->mem->malloc_fcn(tsize);
7091 if (! newV)
7092 return NULL;
7093 memset(newV, 0, tsize);
7094 for (i = 0; i < table->size; i++)
7095 if (table->v[i]) {
7096 unsigned long newHash = hash(parser, table->v[i]->name);
7097 size_t j = newHash & newMask;
7098 step = 0;
7099 while (newV[j]) {
7100 if (! step)
7101 step = PROBE_STEP(newHash, newMask, newPower);
7102 j < step ? (j += newSize - step) : (j -= step);
7104 newV[j] = table->v[i];
7106 table->mem->free_fcn(table->v);
7107 table->v = newV;
7108 table->power = newPower;
7109 table->size = newSize;
7110 i = h & newMask;
7111 step = 0;
7112 while (table->v[i]) {
7113 if (! step)
7114 step = PROBE_STEP(h, newMask, newPower);
7115 i < step ? (i += newSize - step) : (i -= step);
7119 table->v[i] = table->mem->malloc_fcn(createSize);
7120 if (! table->v[i])
7121 return NULL;
7122 memset(table->v[i], 0, createSize);
7123 table->v[i]->name = name;
7124 (table->used)++;
7125 return table->v[i];
7128 static void FASTCALL
7129 hashTableClear(HASH_TABLE *table) {
7130 size_t i;
7131 for (i = 0; i < table->size; i++) {
7132 table->mem->free_fcn(table->v[i]);
7133 table->v[i] = NULL;
7135 table->used = 0;
7138 static void FASTCALL
7139 hashTableDestroy(HASH_TABLE *table) {
7140 size_t i;
7141 for (i = 0; i < table->size; i++)
7142 table->mem->free_fcn(table->v[i]);
7143 table->mem->free_fcn(table->v);
7146 static void FASTCALL
7147 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7148 p->power = 0;
7149 p->size = 0;
7150 p->used = 0;
7151 p->v = NULL;
7152 p->mem = ms;
7155 static void FASTCALL
7156 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7157 iter->p = table->v;
7158 iter->end = iter->p ? iter->p + table->size : NULL;
7161 static NAMED *FASTCALL
7162 hashTableIterNext(HASH_TABLE_ITER *iter) {
7163 while (iter->p != iter->end) {
7164 NAMED *tem = *(iter->p)++;
7165 if (tem)
7166 return tem;
7168 return NULL;
7171 static void FASTCALL
7172 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7173 pool->blocks = NULL;
7174 pool->freeBlocks = NULL;
7175 pool->start = NULL;
7176 pool->ptr = NULL;
7177 pool->end = NULL;
7178 pool->mem = ms;
7181 static void FASTCALL
7182 poolClear(STRING_POOL *pool) {
7183 if (! pool->freeBlocks)
7184 pool->freeBlocks = pool->blocks;
7185 else {
7186 BLOCK *p = pool->blocks;
7187 while (p) {
7188 BLOCK *tem = p->next;
7189 p->next = pool->freeBlocks;
7190 pool->freeBlocks = p;
7191 p = tem;
7194 pool->blocks = NULL;
7195 pool->start = NULL;
7196 pool->ptr = NULL;
7197 pool->end = NULL;
7200 static void FASTCALL
7201 poolDestroy(STRING_POOL *pool) {
7202 BLOCK *p = pool->blocks;
7203 while (p) {
7204 BLOCK *tem = p->next;
7205 pool->mem->free_fcn(p);
7206 p = tem;
7208 p = pool->freeBlocks;
7209 while (p) {
7210 BLOCK *tem = p->next;
7211 pool->mem->free_fcn(p);
7212 p = tem;
7216 static XML_Char *
7217 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7218 const char *end) {
7219 if (! pool->ptr && ! poolGrow(pool))
7220 return NULL;
7221 for (;;) {
7222 const enum XML_Convert_Result convert_res = XmlConvert(
7223 enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
7224 if ((convert_res == XML_CONVERT_COMPLETED)
7225 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7226 break;
7227 if (! poolGrow(pool))
7228 return NULL;
7230 return pool->start;
7233 static const XML_Char *FASTCALL
7234 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7235 do {
7236 if (! poolAppendChar(pool, *s))
7237 return NULL;
7238 } while (*s++);
7239 s = pool->start;
7240 poolFinish(pool);
7241 return s;
7244 static const XML_Char *
7245 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7246 if (! pool->ptr && ! poolGrow(pool)) {
7247 /* The following line is unreachable given the current usage of
7248 * poolCopyStringN(). Currently it is called from exactly one
7249 * place to copy the text of a simple general entity. By that
7250 * point, the name of the entity is already stored in the pool, so
7251 * pool->ptr cannot be NULL.
7253 * If poolCopyStringN() is used elsewhere as it well might be,
7254 * this line may well become executable again. Regardless, this
7255 * sort of check shouldn't be removed lightly, so we just exclude
7256 * it from the coverage statistics.
7258 return NULL; /* LCOV_EXCL_LINE */
7260 for (; n > 0; --n, s++) {
7261 if (! poolAppendChar(pool, *s))
7262 return NULL;
7264 s = pool->start;
7265 poolFinish(pool);
7266 return s;
7269 static const XML_Char *FASTCALL
7270 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7271 while (*s) {
7272 if (! poolAppendChar(pool, *s))
7273 return NULL;
7274 s++;
7276 return pool->start;
7279 static XML_Char *
7280 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7281 const char *end) {
7282 if (! poolAppend(pool, enc, ptr, end))
7283 return NULL;
7284 if (pool->ptr == pool->end && ! poolGrow(pool))
7285 return NULL;
7286 *(pool->ptr)++ = 0;
7287 return pool->start;
7290 static size_t
7291 poolBytesToAllocateFor(int blockSize) {
7292 /* Unprotected math would be:
7293 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7295 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7296 ** For a + b * c we check b * c in isolation first, so that addition of a
7297 ** on top has no chance of making us accept a small non-negative number
7299 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7301 if (blockSize <= 0)
7302 return 0;
7304 if (blockSize > (int)(INT_MAX / stretch))
7305 return 0;
7308 const int stretchedBlockSize = blockSize * (int)stretch;
7309 const int bytesToAllocate
7310 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7311 if (bytesToAllocate < 0)
7312 return 0;
7314 return (size_t)bytesToAllocate;
7318 static XML_Bool FASTCALL
7319 poolGrow(STRING_POOL *pool) {
7320 if (pool->freeBlocks) {
7321 if (pool->start == 0) {
7322 pool->blocks = pool->freeBlocks;
7323 pool->freeBlocks = pool->freeBlocks->next;
7324 pool->blocks->next = NULL;
7325 pool->start = pool->blocks->s;
7326 pool->end = pool->start + pool->blocks->size;
7327 pool->ptr = pool->start;
7328 return XML_TRUE;
7330 if (pool->end - pool->start < pool->freeBlocks->size) {
7331 BLOCK *tem = pool->freeBlocks->next;
7332 pool->freeBlocks->next = pool->blocks;
7333 pool->blocks = pool->freeBlocks;
7334 pool->freeBlocks = tem;
7335 memcpy(pool->blocks->s, pool->start,
7336 (pool->end - pool->start) * sizeof(XML_Char));
7337 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7338 pool->start = pool->blocks->s;
7339 pool->end = pool->start + pool->blocks->size;
7340 return XML_TRUE;
7343 if (pool->blocks && pool->start == pool->blocks->s) {
7344 BLOCK *temp;
7345 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7346 size_t bytesToAllocate;
7348 /* NOTE: Needs to be calculated prior to calling `realloc`
7349 to avoid dangling pointers: */
7350 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7352 if (blockSize < 0) {
7353 /* This condition traps a situation where either more than
7354 * INT_MAX/2 bytes have already been allocated. This isn't
7355 * readily testable, since it is unlikely that an average
7356 * machine will have that much memory, so we exclude it from the
7357 * coverage statistics.
7359 return XML_FALSE; /* LCOV_EXCL_LINE */
7362 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7363 if (bytesToAllocate == 0)
7364 return XML_FALSE;
7366 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7367 (unsigned)bytesToAllocate);
7368 if (temp == NULL)
7369 return XML_FALSE;
7370 pool->blocks = temp;
7371 pool->blocks->size = blockSize;
7372 pool->ptr = pool->blocks->s + offsetInsideBlock;
7373 pool->start = pool->blocks->s;
7374 pool->end = pool->start + blockSize;
7375 } else {
7376 BLOCK *tem;
7377 int blockSize = (int)(pool->end - pool->start);
7378 size_t bytesToAllocate;
7380 if (blockSize < 0) {
7381 /* This condition traps a situation where either more than
7382 * INT_MAX bytes have already been allocated (which is prevented
7383 * by various pieces of program logic, not least this one, never
7384 * mind the unlikelihood of actually having that much memory) or
7385 * the pool control fields have been corrupted (which could
7386 * conceivably happen in an extremely buggy user handler
7387 * function). Either way it isn't readily testable, so we
7388 * exclude it from the coverage statistics.
7390 return XML_FALSE; /* LCOV_EXCL_LINE */
7393 if (blockSize < INIT_BLOCK_SIZE)
7394 blockSize = INIT_BLOCK_SIZE;
7395 else {
7396 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7397 if ((int)((unsigned)blockSize * 2U) < 0) {
7398 return XML_FALSE;
7400 blockSize *= 2;
7403 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7404 if (bytesToAllocate == 0)
7405 return XML_FALSE;
7407 tem = pool->mem->malloc_fcn(bytesToAllocate);
7408 if (! tem)
7409 return XML_FALSE;
7410 tem->size = blockSize;
7411 tem->next = pool->blocks;
7412 pool->blocks = tem;
7413 if (pool->ptr != pool->start)
7414 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7415 pool->ptr = tem->s + (pool->ptr - pool->start);
7416 pool->start = tem->s;
7417 pool->end = tem->s + blockSize;
7419 return XML_TRUE;
7422 static int FASTCALL
7423 nextScaffoldPart(XML_Parser parser) {
7424 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7425 CONTENT_SCAFFOLD *me;
7426 int next;
7428 if (! dtd->scaffIndex) {
7429 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7430 if (! dtd->scaffIndex)
7431 return -1;
7432 dtd->scaffIndex[0] = 0;
7435 if (dtd->scaffCount >= dtd->scaffSize) {
7436 CONTENT_SCAFFOLD *temp;
7437 if (dtd->scaffold) {
7438 /* Detect and prevent integer overflow */
7439 if (dtd->scaffSize > UINT_MAX / 2u) {
7440 return -1;
7442 /* Detect and prevent integer overflow.
7443 * The preprocessor guard addresses the "always false" warning
7444 * from -Wtype-limits on platforms where
7445 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7446 #if UINT_MAX >= SIZE_MAX
7447 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7448 return -1;
7450 #endif
7452 temp = (CONTENT_SCAFFOLD *)REALLOC(
7453 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7454 if (temp == NULL)
7455 return -1;
7456 dtd->scaffSize *= 2;
7457 } else {
7458 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7459 * sizeof(CONTENT_SCAFFOLD));
7460 if (temp == NULL)
7461 return -1;
7462 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7464 dtd->scaffold = temp;
7466 next = dtd->scaffCount++;
7467 me = &dtd->scaffold[next];
7468 if (dtd->scaffLevel) {
7469 CONTENT_SCAFFOLD *parent
7470 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7471 if (parent->lastchild) {
7472 dtd->scaffold[parent->lastchild].nextsib = next;
7474 if (! parent->childcnt)
7475 parent->firstchild = next;
7476 parent->lastchild = next;
7477 parent->childcnt++;
7479 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7480 return next;
7483 static XML_Content *
7484 build_model(XML_Parser parser) {
7485 /* Function build_model transforms the existing parser->m_dtd->scaffold
7486 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7487 * XML_Content tree nodes followed by a gapless list of zero-terminated
7488 * strings. */
7489 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7490 XML_Content *ret;
7491 XML_Char *str; /* the current string writing location */
7493 /* Detect and prevent integer overflow.
7494 * The preprocessor guard addresses the "always false" warning
7495 * from -Wtype-limits on platforms where
7496 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7497 #if UINT_MAX >= SIZE_MAX
7498 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7499 return NULL;
7501 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7502 return NULL;
7504 #endif
7505 if (dtd->scaffCount * sizeof(XML_Content)
7506 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7507 return NULL;
7510 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7511 + (dtd->contentStringLen * sizeof(XML_Char)));
7513 ret = (XML_Content *)MALLOC(parser, allocsize);
7514 if (! ret)
7515 return NULL;
7517 /* What follows is an iterative implementation (of what was previously done
7518 * recursively in a dedicated function called "build_node". The old recursive
7519 * build_node could be forced into stack exhaustion from input as small as a
7520 * few megabyte, and so that was a security issue. Hence, a function call
7521 * stack is avoided now by resolving recursion.)
7523 * The iterative approach works as follows:
7525 * - We have two writing pointers, both walking up the result array; one does
7526 * the work, the other creates "jobs" for its colleague to do, and leads
7527 * the way:
7529 * - The faster one, pointer jobDest, always leads and writes "what job
7530 * to do" by the other, once they reach that place in the
7531 * array: leader "jobDest" stores the source node array index (relative
7532 * to array dtd->scaffold) in field "numchildren".
7534 * - The slower one, pointer dest, looks at the value stored in the
7535 * "numchildren" field (which actually holds a source node array index
7536 * at that time) and puts the real data from dtd->scaffold in.
7538 * - Before the loop starts, jobDest writes source array index 0
7539 * (where the root node is located) so that dest will have something to do
7540 * when it starts operation.
7542 * - Whenever nodes with children are encountered, jobDest appends
7543 * them as new jobs, in order. As a result, tree node siblings are
7544 * adjacent in the resulting array, for example:
7546 * [0] root, has two children
7547 * [1] first child of 0, has three children
7548 * [3] first child of 1, does not have children
7549 * [4] second child of 1, does not have children
7550 * [5] third child of 1, does not have children
7551 * [2] second child of 0, does not have children
7553 * Or (the same data) presented in flat array view:
7555 * [0] root, has two children
7557 * [1] first child of 0, has three children
7558 * [2] second child of 0, does not have children
7560 * [3] first child of 1, does not have children
7561 * [4] second child of 1, does not have children
7562 * [5] third child of 1, does not have children
7564 * - The algorithm repeats until all target array indices have been processed.
7566 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7567 XML_Content *const destLimit = &ret[dtd->scaffCount];
7568 XML_Content *jobDest = ret; /* next free writing location in target array */
7569 str = (XML_Char *)&ret[dtd->scaffCount];
7571 /* Add the starting job, the root node (index 0) of the source tree */
7572 (jobDest++)->numchildren = 0;
7574 for (; dest < destLimit; dest++) {
7575 /* Retrieve source tree array index from job storage */
7576 const int src_node = (int)dest->numchildren;
7578 /* Convert item */
7579 dest->type = dtd->scaffold[src_node].type;
7580 dest->quant = dtd->scaffold[src_node].quant;
7581 if (dest->type == XML_CTYPE_NAME) {
7582 const XML_Char *src;
7583 dest->name = str;
7584 src = dtd->scaffold[src_node].name;
7585 for (;;) {
7586 *str++ = *src;
7587 if (! *src)
7588 break;
7589 src++;
7591 dest->numchildren = 0;
7592 dest->children = NULL;
7593 } else {
7594 unsigned int i;
7595 int cn;
7596 dest->name = NULL;
7597 dest->numchildren = dtd->scaffold[src_node].childcnt;
7598 dest->children = jobDest;
7600 /* Append scaffold indices of children to array */
7601 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7602 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7603 (jobDest++)->numchildren = (unsigned int)cn;
7607 return ret;
7610 static ELEMENT_TYPE *
7611 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7612 const char *end) {
7613 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7614 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7615 ELEMENT_TYPE *ret;
7617 if (! name)
7618 return NULL;
7619 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7620 sizeof(ELEMENT_TYPE));
7621 if (! ret)
7622 return NULL;
7623 if (ret->name != name)
7624 poolDiscard(&dtd->pool);
7625 else {
7626 poolFinish(&dtd->pool);
7627 if (! setElementTypePrefix(parser, ret))
7628 return NULL;
7630 return ret;
7633 static XML_Char *
7634 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7635 size_t charsRequired = 0;
7636 XML_Char *result;
7638 /* First determine how long the string is */
7639 while (s[charsRequired] != 0) {
7640 charsRequired++;
7642 /* Include the terminator */
7643 charsRequired++;
7645 /* Now allocate space for the copy */
7646 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7647 if (result == NULL)
7648 return NULL;
7649 /* Copy the original into place */
7650 memcpy(result, s, charsRequired * sizeof(XML_Char));
7651 return result;
7654 #ifdef XML_DTD
7656 static float
7657 accountingGetCurrentAmplification(XML_Parser rootParser) {
7658 const XmlBigCount countBytesOutput
7659 = rootParser->m_accounting.countBytesDirect
7660 + rootParser->m_accounting.countBytesIndirect;
7661 const float amplificationFactor
7662 = rootParser->m_accounting.countBytesDirect
7663 ? (countBytesOutput
7664 / (float)(rootParser->m_accounting.countBytesDirect))
7665 : 1.0f;
7666 assert(! rootParser->m_parentParser);
7667 return amplificationFactor;
7670 static void
7671 accountingReportStats(XML_Parser originParser, const char *epilog) {
7672 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7673 assert(! rootParser->m_parentParser);
7675 if (rootParser->m_accounting.debugLevel < 1) {
7676 return;
7679 const float amplificationFactor
7680 = accountingGetCurrentAmplification(rootParser);
7681 fprintf(stderr,
7682 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7683 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7684 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7685 rootParser->m_accounting.countBytesIndirect,
7686 (double)amplificationFactor, epilog);
7689 static void
7690 accountingOnAbort(XML_Parser originParser) {
7691 accountingReportStats(originParser, " ABORTING\n");
7694 static void
7695 accountingReportDiff(XML_Parser rootParser,
7696 unsigned int levelsAwayFromRootParser, const char *before,
7697 const char *after, ptrdiff_t bytesMore, int source_line,
7698 enum XML_Account account) {
7699 assert(! rootParser->m_parentParser);
7701 fprintf(stderr,
7702 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7703 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7704 levelsAwayFromRootParser, source_line, 10, "");
7706 const char ellipis[] = "[..]";
7707 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7708 const unsigned int contextLength = 10;
7710 /* Note: Performance is of no concern here */
7711 const char *walker = before;
7712 if ((rootParser->m_accounting.debugLevel >= 3)
7713 || (after - before)
7714 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7715 for (; walker < after; walker++) {
7716 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7718 } else {
7719 for (; walker < before + contextLength; walker++) {
7720 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7722 fprintf(stderr, ellipis);
7723 walker = after - contextLength;
7724 for (; walker < after; walker++) {
7725 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7728 fprintf(stderr, "\"\n");
7731 static XML_Bool
7732 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7733 const char *after, int source_line,
7734 enum XML_Account account) {
7735 /* Note: We need to check the token type *first* to be sure that
7736 * we can even access variable <after>, safely.
7737 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7738 switch (tok) {
7739 case XML_TOK_INVALID:
7740 case XML_TOK_PARTIAL:
7741 case XML_TOK_PARTIAL_CHAR:
7742 case XML_TOK_NONE:
7743 return XML_TRUE;
7746 if (account == XML_ACCOUNT_NONE)
7747 return XML_TRUE; /* because these bytes have been accounted for, already */
7749 unsigned int levelsAwayFromRootParser;
7750 const XML_Parser rootParser
7751 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7752 assert(! rootParser->m_parentParser);
7754 const int isDirect
7755 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7756 const ptrdiff_t bytesMore = after - before;
7758 XmlBigCount *const additionTarget
7759 = isDirect ? &rootParser->m_accounting.countBytesDirect
7760 : &rootParser->m_accounting.countBytesIndirect;
7762 /* Detect and avoid integer overflow */
7763 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7764 return XML_FALSE;
7765 *additionTarget += bytesMore;
7767 const XmlBigCount countBytesOutput
7768 = rootParser->m_accounting.countBytesDirect
7769 + rootParser->m_accounting.countBytesIndirect;
7770 const float amplificationFactor
7771 = accountingGetCurrentAmplification(rootParser);
7772 const XML_Bool tolerated
7773 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7774 || (amplificationFactor
7775 <= rootParser->m_accounting.maximumAmplificationFactor);
7777 if (rootParser->m_accounting.debugLevel >= 2) {
7778 accountingReportStats(rootParser, "");
7779 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7780 bytesMore, source_line, account);
7783 return tolerated;
7786 unsigned long long
7787 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7788 if (! parser)
7789 return 0;
7790 return parser->m_accounting.countBytesDirect;
7793 unsigned long long
7794 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7795 if (! parser)
7796 return 0;
7797 return parser->m_accounting.countBytesIndirect;
7800 static void
7801 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7802 const char *action, int sourceLine) {
7803 assert(! rootParser->m_parentParser);
7804 if (rootParser->m_entity_stats.debugLevel < 1)
7805 return;
7807 # if defined(XML_UNICODE)
7808 const char *const entityName = "[..]";
7809 # else
7810 const char *const entityName = entity->name;
7811 # endif
7813 fprintf(
7814 stderr,
7815 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7816 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7817 rootParser->m_entity_stats.currentDepth,
7818 rootParser->m_entity_stats.maximumDepthSeen,
7819 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7820 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7821 sourceLine);
7824 static void
7825 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7826 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7827 assert(! rootParser->m_parentParser);
7829 rootParser->m_entity_stats.countEverOpened++;
7830 rootParser->m_entity_stats.currentDepth++;
7831 if (rootParser->m_entity_stats.currentDepth
7832 > rootParser->m_entity_stats.maximumDepthSeen) {
7833 rootParser->m_entity_stats.maximumDepthSeen++;
7836 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7839 static void
7840 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7841 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7842 assert(! rootParser->m_parentParser);
7844 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7845 rootParser->m_entity_stats.currentDepth--;
7848 static XML_Parser
7849 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7850 XML_Parser rootParser = parser;
7851 unsigned int stepsTakenUpwards = 0;
7852 while (rootParser->m_parentParser) {
7853 rootParser = rootParser->m_parentParser;
7854 stepsTakenUpwards++;
7856 assert(! rootParser->m_parentParser);
7857 if (outLevelDiff != NULL) {
7858 *outLevelDiff = stepsTakenUpwards;
7860 return rootParser;
7863 const char *
7864 unsignedCharToPrintable(unsigned char c) {
7865 switch (c) {
7866 case 0:
7867 return "\\0";
7868 case 1:
7869 return "\\x1";
7870 case 2:
7871 return "\\x2";
7872 case 3:
7873 return "\\x3";
7874 case 4:
7875 return "\\x4";
7876 case 5:
7877 return "\\x5";
7878 case 6:
7879 return "\\x6";
7880 case 7:
7881 return "\\x7";
7882 case 8:
7883 return "\\x8";
7884 case 9:
7885 return "\\t";
7886 case 10:
7887 return "\\n";
7888 case 11:
7889 return "\\xB";
7890 case 12:
7891 return "\\xC";
7892 case 13:
7893 return "\\r";
7894 case 14:
7895 return "\\xE";
7896 case 15:
7897 return "\\xF";
7898 case 16:
7899 return "\\x10";
7900 case 17:
7901 return "\\x11";
7902 case 18:
7903 return "\\x12";
7904 case 19:
7905 return "\\x13";
7906 case 20:
7907 return "\\x14";
7908 case 21:
7909 return "\\x15";
7910 case 22:
7911 return "\\x16";
7912 case 23:
7913 return "\\x17";
7914 case 24:
7915 return "\\x18";
7916 case 25:
7917 return "\\x19";
7918 case 26:
7919 return "\\x1A";
7920 case 27:
7921 return "\\x1B";
7922 case 28:
7923 return "\\x1C";
7924 case 29:
7925 return "\\x1D";
7926 case 30:
7927 return "\\x1E";
7928 case 31:
7929 return "\\x1F";
7930 case 32:
7931 return " ";
7932 case 33:
7933 return "!";
7934 case 34:
7935 return "\\\"";
7936 case 35:
7937 return "#";
7938 case 36:
7939 return "$";
7940 case 37:
7941 return "%";
7942 case 38:
7943 return "&";
7944 case 39:
7945 return "'";
7946 case 40:
7947 return "(";
7948 case 41:
7949 return ")";
7950 case 42:
7951 return "*";
7952 case 43:
7953 return "+";
7954 case 44:
7955 return ",";
7956 case 45:
7957 return "-";
7958 case 46:
7959 return ".";
7960 case 47:
7961 return "/";
7962 case 48:
7963 return "0";
7964 case 49:
7965 return "1";
7966 case 50:
7967 return "2";
7968 case 51:
7969 return "3";
7970 case 52:
7971 return "4";
7972 case 53:
7973 return "5";
7974 case 54:
7975 return "6";
7976 case 55:
7977 return "7";
7978 case 56:
7979 return "8";
7980 case 57:
7981 return "9";
7982 case 58:
7983 return ":";
7984 case 59:
7985 return ";";
7986 case 60:
7987 return "<";
7988 case 61:
7989 return "=";
7990 case 62:
7991 return ">";
7992 case 63:
7993 return "?";
7994 case 64:
7995 return "@";
7996 case 65:
7997 return "A";
7998 case 66:
7999 return "B";
8000 case 67:
8001 return "C";
8002 case 68:
8003 return "D";
8004 case 69:
8005 return "E";
8006 case 70:
8007 return "F";
8008 case 71:
8009 return "G";
8010 case 72:
8011 return "H";
8012 case 73:
8013 return "I";
8014 case 74:
8015 return "J";
8016 case 75:
8017 return "K";
8018 case 76:
8019 return "L";
8020 case 77:
8021 return "M";
8022 case 78:
8023 return "N";
8024 case 79:
8025 return "O";
8026 case 80:
8027 return "P";
8028 case 81:
8029 return "Q";
8030 case 82:
8031 return "R";
8032 case 83:
8033 return "S";
8034 case 84:
8035 return "T";
8036 case 85:
8037 return "U";
8038 case 86:
8039 return "V";
8040 case 87:
8041 return "W";
8042 case 88:
8043 return "X";
8044 case 89:
8045 return "Y";
8046 case 90:
8047 return "Z";
8048 case 91:
8049 return "[";
8050 case 92:
8051 return "\\\\";
8052 case 93:
8053 return "]";
8054 case 94:
8055 return "^";
8056 case 95:
8057 return "_";
8058 case 96:
8059 return "`";
8060 case 97:
8061 return "a";
8062 case 98:
8063 return "b";
8064 case 99:
8065 return "c";
8066 case 100:
8067 return "d";
8068 case 101:
8069 return "e";
8070 case 102:
8071 return "f";
8072 case 103:
8073 return "g";
8074 case 104:
8075 return "h";
8076 case 105:
8077 return "i";
8078 case 106:
8079 return "j";
8080 case 107:
8081 return "k";
8082 case 108:
8083 return "l";
8084 case 109:
8085 return "m";
8086 case 110:
8087 return "n";
8088 case 111:
8089 return "o";
8090 case 112:
8091 return "p";
8092 case 113:
8093 return "q";
8094 case 114:
8095 return "r";
8096 case 115:
8097 return "s";
8098 case 116:
8099 return "t";
8100 case 117:
8101 return "u";
8102 case 118:
8103 return "v";
8104 case 119:
8105 return "w";
8106 case 120:
8107 return "x";
8108 case 121:
8109 return "y";
8110 case 122:
8111 return "z";
8112 case 123:
8113 return "{";
8114 case 124:
8115 return "|";
8116 case 125:
8117 return "}";
8118 case 126:
8119 return "~";
8120 case 127:
8121 return "\\x7F";
8122 case 128:
8123 return "\\x80";
8124 case 129:
8125 return "\\x81";
8126 case 130:
8127 return "\\x82";
8128 case 131:
8129 return "\\x83";
8130 case 132:
8131 return "\\x84";
8132 case 133:
8133 return "\\x85";
8134 case 134:
8135 return "\\x86";
8136 case 135:
8137 return "\\x87";
8138 case 136:
8139 return "\\x88";
8140 case 137:
8141 return "\\x89";
8142 case 138:
8143 return "\\x8A";
8144 case 139:
8145 return "\\x8B";
8146 case 140:
8147 return "\\x8C";
8148 case 141:
8149 return "\\x8D";
8150 case 142:
8151 return "\\x8E";
8152 case 143:
8153 return "\\x8F";
8154 case 144:
8155 return "\\x90";
8156 case 145:
8157 return "\\x91";
8158 case 146:
8159 return "\\x92";
8160 case 147:
8161 return "\\x93";
8162 case 148:
8163 return "\\x94";
8164 case 149:
8165 return "\\x95";
8166 case 150:
8167 return "\\x96";
8168 case 151:
8169 return "\\x97";
8170 case 152:
8171 return "\\x98";
8172 case 153:
8173 return "\\x99";
8174 case 154:
8175 return "\\x9A";
8176 case 155:
8177 return "\\x9B";
8178 case 156:
8179 return "\\x9C";
8180 case 157:
8181 return "\\x9D";
8182 case 158:
8183 return "\\x9E";
8184 case 159:
8185 return "\\x9F";
8186 case 160:
8187 return "\\xA0";
8188 case 161:
8189 return "\\xA1";
8190 case 162:
8191 return "\\xA2";
8192 case 163:
8193 return "\\xA3";
8194 case 164:
8195 return "\\xA4";
8196 case 165:
8197 return "\\xA5";
8198 case 166:
8199 return "\\xA6";
8200 case 167:
8201 return "\\xA7";
8202 case 168:
8203 return "\\xA8";
8204 case 169:
8205 return "\\xA9";
8206 case 170:
8207 return "\\xAA";
8208 case 171:
8209 return "\\xAB";
8210 case 172:
8211 return "\\xAC";
8212 case 173:
8213 return "\\xAD";
8214 case 174:
8215 return "\\xAE";
8216 case 175:
8217 return "\\xAF";
8218 case 176:
8219 return "\\xB0";
8220 case 177:
8221 return "\\xB1";
8222 case 178:
8223 return "\\xB2";
8224 case 179:
8225 return "\\xB3";
8226 case 180:
8227 return "\\xB4";
8228 case 181:
8229 return "\\xB5";
8230 case 182:
8231 return "\\xB6";
8232 case 183:
8233 return "\\xB7";
8234 case 184:
8235 return "\\xB8";
8236 case 185:
8237 return "\\xB9";
8238 case 186:
8239 return "\\xBA";
8240 case 187:
8241 return "\\xBB";
8242 case 188:
8243 return "\\xBC";
8244 case 189:
8245 return "\\xBD";
8246 case 190:
8247 return "\\xBE";
8248 case 191:
8249 return "\\xBF";
8250 case 192:
8251 return "\\xC0";
8252 case 193:
8253 return "\\xC1";
8254 case 194:
8255 return "\\xC2";
8256 case 195:
8257 return "\\xC3";
8258 case 196:
8259 return "\\xC4";
8260 case 197:
8261 return "\\xC5";
8262 case 198:
8263 return "\\xC6";
8264 case 199:
8265 return "\\xC7";
8266 case 200:
8267 return "\\xC8";
8268 case 201:
8269 return "\\xC9";
8270 case 202:
8271 return "\\xCA";
8272 case 203:
8273 return "\\xCB";
8274 case 204:
8275 return "\\xCC";
8276 case 205:
8277 return "\\xCD";
8278 case 206:
8279 return "\\xCE";
8280 case 207:
8281 return "\\xCF";
8282 case 208:
8283 return "\\xD0";
8284 case 209:
8285 return "\\xD1";
8286 case 210:
8287 return "\\xD2";
8288 case 211:
8289 return "\\xD3";
8290 case 212:
8291 return "\\xD4";
8292 case 213:
8293 return "\\xD5";
8294 case 214:
8295 return "\\xD6";
8296 case 215:
8297 return "\\xD7";
8298 case 216:
8299 return "\\xD8";
8300 case 217:
8301 return "\\xD9";
8302 case 218:
8303 return "\\xDA";
8304 case 219:
8305 return "\\xDB";
8306 case 220:
8307 return "\\xDC";
8308 case 221:
8309 return "\\xDD";
8310 case 222:
8311 return "\\xDE";
8312 case 223:
8313 return "\\xDF";
8314 case 224:
8315 return "\\xE0";
8316 case 225:
8317 return "\\xE1";
8318 case 226:
8319 return "\\xE2";
8320 case 227:
8321 return "\\xE3";
8322 case 228:
8323 return "\\xE4";
8324 case 229:
8325 return "\\xE5";
8326 case 230:
8327 return "\\xE6";
8328 case 231:
8329 return "\\xE7";
8330 case 232:
8331 return "\\xE8";
8332 case 233:
8333 return "\\xE9";
8334 case 234:
8335 return "\\xEA";
8336 case 235:
8337 return "\\xEB";
8338 case 236:
8339 return "\\xEC";
8340 case 237:
8341 return "\\xED";
8342 case 238:
8343 return "\\xEE";
8344 case 239:
8345 return "\\xEF";
8346 case 240:
8347 return "\\xF0";
8348 case 241:
8349 return "\\xF1";
8350 case 242:
8351 return "\\xF2";
8352 case 243:
8353 return "\\xF3";
8354 case 244:
8355 return "\\xF4";
8356 case 245:
8357 return "\\xF5";
8358 case 246:
8359 return "\\xF6";
8360 case 247:
8361 return "\\xF7";
8362 case 248:
8363 return "\\xF8";
8364 case 249:
8365 return "\\xF9";
8366 case 250:
8367 return "\\xFA";
8368 case 251:
8369 return "\\xFB";
8370 case 252:
8371 return "\\xFC";
8372 case 253:
8373 return "\\xFD";
8374 case 254:
8375 return "\\xFE";
8376 case 255:
8377 return "\\xFF";
8378 default:
8379 assert(0); /* never gets here */
8380 return "dead code";
8382 assert(0); /* never gets here */
8385 #endif /* XML_DTD */
8387 static unsigned long
8388 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8389 const char *const valueOrNull = getenv(variableName);
8390 if (valueOrNull == NULL) {
8391 return defaultDebugLevel;
8393 const char *const value = valueOrNull;
8395 errno = 0;
8396 char *afterValue = (char *)value;
8397 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8398 if ((errno != 0) || (afterValue[0] != '\0')) {
8399 errno = 0;
8400 return defaultDebugLevel;
8403 return debugLevel;