Allow multiple volumes with the same name if their creation dates differ.
[AROS.git] / workbench / libs / expat / lib / xmlparse.c
blobc3cf1bb1af01a3af36c63c349138c8c949625e69
1 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
3 */
5 #include <stddef.h>
6 #include <string.h> /* memset(), memcpy() */
7 #include <assert.h>
8 #include <limits.h> /* UINT_MAX */
9 #include <time.h> /* time() */
11 #define XML_BUILDING_EXPAT 1
13 #ifdef COMPILED_FROM_DSP
14 #include "winconfig.h"
15 #elif defined(MACOS_CLASSIC)
16 #include "macconfig.h"
17 #elif defined(__amigaos__)
18 #include "amigaconfig.h"
19 #elif defined(__AROS__)
20 #include "arosconfig.h"
21 #undef assert
22 #define assert(x) ASSERT(x)
23 #elif defined(__WATCOMC__)
24 #include "watcomconfig.h"
25 #elif defined(HAVE_EXPAT_CONFIG_H)
26 #include <expat_config.h>
27 #endif /* ndef COMPILED_FROM_DSP */
29 #include "ascii.h"
30 #include "expat.h"
32 #ifdef XML_UNICODE
33 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
34 #define XmlConvert XmlUtf16Convert
35 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
36 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
37 #define XmlEncode XmlUtf16Encode
38 /* Using pointer subtraction to convert to integer type. */
39 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
40 typedef unsigned short ICHAR;
41 #else
42 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
43 #define XmlConvert XmlUtf8Convert
44 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
45 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
46 #define XmlEncode XmlUtf8Encode
47 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
48 typedef char ICHAR;
49 #endif
52 #ifndef XML_NS
54 #define XmlInitEncodingNS XmlInitEncoding
55 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
56 #undef XmlGetInternalEncodingNS
57 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
58 #define XmlParseXmlDeclNS XmlParseXmlDecl
60 #endif
62 #ifdef XML_UNICODE
64 #ifdef XML_UNICODE_WCHAR_T
65 #define XML_T(x) (const wchar_t)x
66 #define XML_L(x) L ## x
67 #else
68 #define XML_T(x) (const unsigned short)x
69 #define XML_L(x) x
70 #endif
72 #else
74 #define XML_T(x) x
75 #define XML_L(x) x
77 #endif
79 /* Round up n to be a multiple of sz, where sz is a power of 2. */
80 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
82 /* Handle the case where memmove() doesn't exist. */
83 #ifndef HAVE_MEMMOVE
84 #ifdef HAVE_BCOPY
85 #define memmove(d,s,l) bcopy((s),(d),(l))
86 #else
87 #error memmove does not exist on this platform, nor is a substitute available
88 #endif /* HAVE_BCOPY */
89 #endif /* HAVE_MEMMOVE */
91 #include "internal.h"
92 #include "xmltok.h"
93 #include "xmlrole.h"
95 typedef const XML_Char *KEY;
97 typedef struct {
98 KEY name;
99 } NAMED;
101 typedef struct {
102 NAMED **v;
103 unsigned char power;
104 size_t size;
105 size_t used;
106 const XML_Memory_Handling_Suite *mem;
107 } HASH_TABLE;
109 /* Basic character hash algorithm, taken from Python's string hash:
110 h = h * 1000003 ^ character, the constant being a prime number.
113 #ifdef XML_UNICODE
114 #define CHAR_HASH(h, c) \
115 (((h) * 0xF4243) ^ (unsigned short)(c))
116 #else
117 #define CHAR_HASH(h, c) \
118 (((h) * 0xF4243) ^ (unsigned char)(c))
119 #endif
121 /* For probing (after a collision) we need a step size relative prime
122 to the hash table size, which is a power of 2. We use double-hashing,
123 since we can calculate a second hash value cheaply by taking those bits
124 of the first hash value that were discarded (masked out) when the table
125 index was calculated: index = hash & mask, where mask = table->size - 1.
126 We limit the maximum step size to table->size / 4 (mask >> 2) and make
127 it odd, since odd numbers are always relative prime to a power of 2.
129 #define SECOND_HASH(hash, mask, power) \
130 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
131 #define PROBE_STEP(hash, mask, power) \
132 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
134 typedef struct {
135 NAMED **p;
136 NAMED **end;
137 } HASH_TABLE_ITER;
139 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
140 #define INIT_DATA_BUF_SIZE 1024
141 #define INIT_ATTS_SIZE 16
142 #define INIT_ATTS_VERSION 0xFFFFFFFF
143 #define INIT_BLOCK_SIZE 1024
144 #define INIT_BUFFER_SIZE 1024
146 #define EXPAND_SPARE 24
148 typedef struct binding {
149 struct prefix *prefix;
150 struct binding *nextTagBinding;
151 struct binding *prevPrefixBinding;
152 const struct attribute_id *attId;
153 XML_Char *uri;
154 int uriLen;
155 int uriAlloc;
156 } BINDING;
158 typedef struct prefix {
159 const XML_Char *name;
160 BINDING *binding;
161 } PREFIX;
163 typedef struct {
164 const XML_Char *str;
165 const XML_Char *localPart;
166 const XML_Char *prefix;
167 int strLen;
168 int uriLen;
169 int prefixLen;
170 } TAG_NAME;
172 /* TAG represents an open element.
173 The name of the element is stored in both the document and API
174 encodings. The memory buffer 'buf' is a separately-allocated
175 memory area which stores the name. During the XML_Parse()/
176 XMLParseBuffer() when the element is open, the memory for the 'raw'
177 version of the name (in the document encoding) is shared with the
178 document buffer. If the element is open across calls to
179 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
180 contain the 'raw' name as well.
182 A parser re-uses these structures, maintaining a list of allocated
183 TAG objects in a free list.
185 typedef struct tag {
186 struct tag *parent; /* parent of this element */
187 const char *rawName; /* tagName in the original encoding */
188 int rawNameLength;
189 TAG_NAME name; /* tagName in the API encoding */
190 char *buf; /* buffer for name components */
191 char *bufEnd; /* end of the buffer */
192 BINDING *bindings;
193 } TAG;
195 typedef struct {
196 const XML_Char *name;
197 const XML_Char *textPtr;
198 int textLen; /* length in XML_Chars */
199 int processed; /* # of processed bytes - when suspended */
200 const XML_Char *systemId;
201 const XML_Char *base;
202 const XML_Char *publicId;
203 const XML_Char *notation;
204 XML_Bool open;
205 XML_Bool is_param;
206 XML_Bool is_internal; /* true if declared in internal subset outside PE */
207 } ENTITY;
209 typedef struct {
210 enum XML_Content_Type type;
211 enum XML_Content_Quant quant;
212 const XML_Char * name;
213 int firstchild;
214 int lastchild;
215 int childcnt;
216 int nextsib;
217 } CONTENT_SCAFFOLD;
219 #define INIT_SCAFFOLD_ELEMENTS 32
221 typedef struct block {
222 struct block *next;
223 int size;
224 XML_Char s[1];
225 } BLOCK;
227 typedef struct {
228 BLOCK *blocks;
229 BLOCK *freeBlocks;
230 const XML_Char *end;
231 XML_Char *ptr;
232 XML_Char *start;
233 const XML_Memory_Handling_Suite *mem;
234 } STRING_POOL;
236 /* The XML_Char before the name is used to determine whether
237 an attribute has been specified. */
238 typedef struct attribute_id {
239 XML_Char *name;
240 PREFIX *prefix;
241 XML_Bool maybeTokenized;
242 XML_Bool xmlns;
243 } ATTRIBUTE_ID;
245 typedef struct {
246 const ATTRIBUTE_ID *id;
247 XML_Bool isCdata;
248 const XML_Char *value;
249 } DEFAULT_ATTRIBUTE;
251 typedef struct {
252 unsigned long version;
253 unsigned long hash;
254 const XML_Char *uriName;
255 } NS_ATT;
257 typedef struct {
258 const XML_Char *name;
259 PREFIX *prefix;
260 const ATTRIBUTE_ID *idAtt;
261 int nDefaultAtts;
262 int allocDefaultAtts;
263 DEFAULT_ATTRIBUTE *defaultAtts;
264 } ELEMENT_TYPE;
266 typedef struct {
267 HASH_TABLE generalEntities;
268 HASH_TABLE elementTypes;
269 HASH_TABLE attributeIds;
270 HASH_TABLE prefixes;
271 STRING_POOL pool;
272 STRING_POOL entityValuePool;
273 /* false once a parameter entity reference has been skipped */
274 XML_Bool keepProcessing;
275 /* true once an internal or external PE reference has been encountered;
276 this includes the reference to an external subset */
277 XML_Bool hasParamEntityRefs;
278 XML_Bool standalone;
279 #ifdef XML_DTD
280 /* indicates if external PE has been read */
281 XML_Bool paramEntityRead;
282 HASH_TABLE paramEntities;
283 #endif /* XML_DTD */
284 PREFIX defaultPrefix;
285 /* === scaffolding for building content model === */
286 XML_Bool in_eldecl;
287 CONTENT_SCAFFOLD *scaffold;
288 unsigned contentStringLen;
289 unsigned scaffSize;
290 unsigned scaffCount;
291 int scaffLevel;
292 int *scaffIndex;
293 } DTD;
295 typedef struct open_internal_entity {
296 const char *internalEventPtr;
297 const char *internalEventEndPtr;
298 struct open_internal_entity *next;
299 ENTITY *entity;
300 int startTagLevel;
301 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
302 } OPEN_INTERNAL_ENTITY;
304 typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
305 const char *start,
306 const char *end,
307 const char **endPtr);
309 static Processor prologProcessor;
310 static Processor prologInitProcessor;
311 static Processor contentProcessor;
312 static Processor cdataSectionProcessor;
313 #ifdef XML_DTD
314 static Processor ignoreSectionProcessor;
315 static Processor externalParEntProcessor;
316 static Processor externalParEntInitProcessor;
317 static Processor entityValueProcessor;
318 static Processor entityValueInitProcessor;
319 #endif /* XML_DTD */
320 static Processor epilogProcessor;
321 static Processor errorProcessor;
322 static Processor externalEntityInitProcessor;
323 static Processor externalEntityInitProcessor2;
324 static Processor externalEntityInitProcessor3;
325 static Processor externalEntityContentProcessor;
326 static Processor internalEntityProcessor;
328 static enum XML_Error
329 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
330 static enum XML_Error
331 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
332 const char *s, const char *next);
333 static enum XML_Error
334 initializeEncoding(XML_Parser parser);
335 static enum XML_Error
336 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
337 const char *end, int tok, const char *next, const char **nextPtr,
338 XML_Bool haveMore);
339 static enum XML_Error
340 processInternalEntity(XML_Parser parser, ENTITY *entity,
341 XML_Bool betweenDecl);
342 static enum XML_Error
343 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
344 const char *start, const char *end, const char **endPtr,
345 XML_Bool haveMore);
346 static enum XML_Error
347 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
348 const char *end, const char **nextPtr, XML_Bool haveMore);
349 #ifdef XML_DTD
350 static enum XML_Error
351 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
352 const char *end, const char **nextPtr, XML_Bool haveMore);
353 #endif /* XML_DTD */
355 static enum XML_Error
356 storeAtts(XML_Parser parser, const ENCODING *, const char *s,
357 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
358 static enum XML_Error
359 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
360 const XML_Char *uri, BINDING **bindingsPtr);
361 static int
362 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
363 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
364 static enum XML_Error
365 storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
366 const char *, const char *, STRING_POOL *);
367 static enum XML_Error
368 appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
369 const char *, const char *, STRING_POOL *);
370 static ATTRIBUTE_ID *
371 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
372 const char *end);
373 static int
374 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
375 static enum XML_Error
376 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
377 const char *end);
378 static int
379 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
380 const char *start, const char *end);
381 static int
382 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
383 const char *end);
384 static void
385 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
386 const char *end);
388 static const XML_Char * getContext(XML_Parser parser);
389 static XML_Bool
390 setContext(XML_Parser parser, const XML_Char *context);
392 static void FASTCALL normalizePublicId(XML_Char *s);
394 static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
395 /* do not call if parentParser != NULL */
396 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
397 static void
398 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
399 static int
400 dtdCopy(XML_Parser oldParser,
401 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
402 static int
403 copyEntityTable(XML_Parser oldParser,
404 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
405 static NAMED *
406 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
407 static void FASTCALL
408 hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
409 static void FASTCALL hashTableClear(HASH_TABLE *);
410 static void FASTCALL hashTableDestroy(HASH_TABLE *);
411 static void FASTCALL
412 hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
413 static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
415 static void FASTCALL
416 poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
417 static void FASTCALL poolClear(STRING_POOL *);
418 static void FASTCALL poolDestroy(STRING_POOL *);
419 static XML_Char *
420 poolAppend(STRING_POOL *pool, const ENCODING *enc,
421 const char *ptr, const char *end);
422 static XML_Char *
423 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
424 const char *ptr, const char *end);
425 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
426 static const XML_Char * FASTCALL
427 poolCopyString(STRING_POOL *pool, const XML_Char *s);
428 static const XML_Char *
429 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
430 static const XML_Char * FASTCALL
431 poolAppendString(STRING_POOL *pool, const XML_Char *s);
433 static int FASTCALL nextScaffoldPart(XML_Parser parser);
434 static XML_Content * build_model(XML_Parser parser);
435 static ELEMENT_TYPE *
436 getElementType(XML_Parser parser, const ENCODING *enc,
437 const char *ptr, const char *end);
439 static unsigned long generate_hash_secret_salt(void);
440 static XML_Bool startParsing(XML_Parser parser);
442 static XML_Parser
443 parserCreate(const XML_Char *encodingName,
444 const XML_Memory_Handling_Suite *memsuite,
445 const XML_Char *nameSep,
446 DTD *dtd);
448 static void
449 parserInit(XML_Parser parser, const XML_Char *encodingName);
451 #define poolStart(pool) ((pool)->start)
452 #define poolEnd(pool) ((pool)->ptr)
453 #define poolLength(pool) ((pool)->ptr - (pool)->start)
454 #define poolChop(pool) ((void)--(pool->ptr))
455 #define poolLastChar(pool) (((pool)->ptr)[-1])
456 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
457 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
458 #define poolAppendChar(pool, c) \
459 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
460 ? 0 \
461 : ((*((pool)->ptr)++ = c), 1))
463 struct XML_ParserStruct {
464 /* The first member must be userData so that the XML_GetUserData
465 macro works. */
466 void *m_userData;
467 void *m_handlerArg;
468 char *m_buffer;
469 const XML_Memory_Handling_Suite m_mem;
470 /* first character to be parsed */
471 const char *m_bufferPtr;
472 /* past last character to be parsed */
473 char *m_bufferEnd;
474 /* allocated end of buffer */
475 const char *m_bufferLim;
476 XML_Index m_parseEndByteIndex;
477 const char *m_parseEndPtr;
478 XML_Char *m_dataBuf;
479 XML_Char *m_dataBufEnd;
480 XML_StartElementHandler m_startElementHandler;
481 XML_EndElementHandler m_endElementHandler;
482 XML_CharacterDataHandler m_characterDataHandler;
483 XML_ProcessingInstructionHandler m_processingInstructionHandler;
484 XML_CommentHandler m_commentHandler;
485 XML_StartCdataSectionHandler m_startCdataSectionHandler;
486 XML_EndCdataSectionHandler m_endCdataSectionHandler;
487 XML_DefaultHandler m_defaultHandler;
488 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
489 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
490 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
491 XML_NotationDeclHandler m_notationDeclHandler;
492 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
493 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
494 XML_NotStandaloneHandler m_notStandaloneHandler;
495 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
496 XML_Parser m_externalEntityRefHandlerArg;
497 XML_SkippedEntityHandler m_skippedEntityHandler;
498 XML_UnknownEncodingHandler m_unknownEncodingHandler;
499 XML_ElementDeclHandler m_elementDeclHandler;
500 XML_AttlistDeclHandler m_attlistDeclHandler;
501 XML_EntityDeclHandler m_entityDeclHandler;
502 XML_XmlDeclHandler m_xmlDeclHandler;
503 const ENCODING *m_encoding;
504 INIT_ENCODING m_initEncoding;
505 const ENCODING *m_internalEncoding;
506 const XML_Char *m_protocolEncodingName;
507 XML_Bool m_ns;
508 XML_Bool m_ns_triplets;
509 void *m_unknownEncodingMem;
510 void *m_unknownEncodingData;
511 void *m_unknownEncodingHandlerData;
512 void (XMLCALL *m_unknownEncodingRelease)(void *);
513 PROLOG_STATE m_prologState;
514 Processor *m_processor;
515 enum XML_Error m_errorCode;
516 const char *m_eventPtr;
517 const char *m_eventEndPtr;
518 const char *m_positionPtr;
519 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
520 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
521 XML_Bool m_defaultExpandInternalEntities;
522 int m_tagLevel;
523 ENTITY *m_declEntity;
524 const XML_Char *m_doctypeName;
525 const XML_Char *m_doctypeSysid;
526 const XML_Char *m_doctypePubid;
527 const XML_Char *m_declAttributeType;
528 const XML_Char *m_declNotationName;
529 const XML_Char *m_declNotationPublicId;
530 ELEMENT_TYPE *m_declElementType;
531 ATTRIBUTE_ID *m_declAttributeId;
532 XML_Bool m_declAttributeIsCdata;
533 XML_Bool m_declAttributeIsId;
534 DTD *m_dtd;
535 const XML_Char *m_curBase;
536 TAG *m_tagStack;
537 TAG *m_freeTagList;
538 BINDING *m_inheritedBindings;
539 BINDING *m_freeBindingList;
540 int m_attsSize;
541 int m_nSpecifiedAtts;
542 int m_idAttIndex;
543 ATTRIBUTE *m_atts;
544 NS_ATT *m_nsAtts;
545 unsigned long m_nsAttsVersion;
546 unsigned char m_nsAttsPower;
547 #ifdef XML_ATTR_INFO
548 XML_AttrInfo *m_attInfo;
549 #endif
550 POSITION m_position;
551 STRING_POOL m_tempPool;
552 STRING_POOL m_temp2Pool;
553 char *m_groupConnector;
554 unsigned int m_groupSize;
555 XML_Char m_namespaceSeparator;
556 XML_Parser m_parentParser;
557 XML_ParsingStatus m_parsingStatus;
558 #ifdef XML_DTD
559 XML_Bool m_isParamEntity;
560 XML_Bool m_useForeignDTD;
561 enum XML_ParamEntityParsing m_paramEntityParsing;
562 #endif
563 unsigned long m_hash_secret_salt;
566 #define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
567 #define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
568 #define FREE(p) (parser->m_mem.free_fcn((p)))
570 #define userData (parser->m_userData)
571 #define handlerArg (parser->m_handlerArg)
572 #define startElementHandler (parser->m_startElementHandler)
573 #define endElementHandler (parser->m_endElementHandler)
574 #define characterDataHandler (parser->m_characterDataHandler)
575 #define processingInstructionHandler \
576 (parser->m_processingInstructionHandler)
577 #define commentHandler (parser->m_commentHandler)
578 #define startCdataSectionHandler \
579 (parser->m_startCdataSectionHandler)
580 #define endCdataSectionHandler (parser->m_endCdataSectionHandler)
581 #define defaultHandler (parser->m_defaultHandler)
582 #define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
583 #define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
584 #define unparsedEntityDeclHandler \
585 (parser->m_unparsedEntityDeclHandler)
586 #define notationDeclHandler (parser->m_notationDeclHandler)
587 #define startNamespaceDeclHandler \
588 (parser->m_startNamespaceDeclHandler)
589 #define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
590 #define notStandaloneHandler (parser->m_notStandaloneHandler)
591 #define externalEntityRefHandler \
592 (parser->m_externalEntityRefHandler)
593 #define externalEntityRefHandlerArg \
594 (parser->m_externalEntityRefHandlerArg)
595 #define internalEntityRefHandler \
596 (parser->m_internalEntityRefHandler)
597 #define skippedEntityHandler (parser->m_skippedEntityHandler)
598 #define unknownEncodingHandler (parser->m_unknownEncodingHandler)
599 #define elementDeclHandler (parser->m_elementDeclHandler)
600 #define attlistDeclHandler (parser->m_attlistDeclHandler)
601 #define entityDeclHandler (parser->m_entityDeclHandler)
602 #define xmlDeclHandler (parser->m_xmlDeclHandler)
603 #define encoding (parser->m_encoding)
604 #define initEncoding (parser->m_initEncoding)
605 #define internalEncoding (parser->m_internalEncoding)
606 #define unknownEncodingMem (parser->m_unknownEncodingMem)
607 #define unknownEncodingData (parser->m_unknownEncodingData)
608 #define unknownEncodingHandlerData \
609 (parser->m_unknownEncodingHandlerData)
610 #define unknownEncodingRelease (parser->m_unknownEncodingRelease)
611 #define protocolEncodingName (parser->m_protocolEncodingName)
612 #define ns (parser->m_ns)
613 #define ns_triplets (parser->m_ns_triplets)
614 #define prologState (parser->m_prologState)
615 #define processor (parser->m_processor)
616 #define errorCode (parser->m_errorCode)
617 #define eventPtr (parser->m_eventPtr)
618 #define eventEndPtr (parser->m_eventEndPtr)
619 #define positionPtr (parser->m_positionPtr)
620 #define position (parser->m_position)
621 #define openInternalEntities (parser->m_openInternalEntities)
622 #define freeInternalEntities (parser->m_freeInternalEntities)
623 #define defaultExpandInternalEntities \
624 (parser->m_defaultExpandInternalEntities)
625 #define tagLevel (parser->m_tagLevel)
626 #define buffer (parser->m_buffer)
627 #define bufferPtr (parser->m_bufferPtr)
628 #define bufferEnd (parser->m_bufferEnd)
629 #define parseEndByteIndex (parser->m_parseEndByteIndex)
630 #define parseEndPtr (parser->m_parseEndPtr)
631 #define bufferLim (parser->m_bufferLim)
632 #define dataBuf (parser->m_dataBuf)
633 #define dataBufEnd (parser->m_dataBufEnd)
634 #define _dtd (parser->m_dtd)
635 #define curBase (parser->m_curBase)
636 #define declEntity (parser->m_declEntity)
637 #define doctypeName (parser->m_doctypeName)
638 #define doctypeSysid (parser->m_doctypeSysid)
639 #define doctypePubid (parser->m_doctypePubid)
640 #define declAttributeType (parser->m_declAttributeType)
641 #define declNotationName (parser->m_declNotationName)
642 #define declNotationPublicId (parser->m_declNotationPublicId)
643 #define declElementType (parser->m_declElementType)
644 #define declAttributeId (parser->m_declAttributeId)
645 #define declAttributeIsCdata (parser->m_declAttributeIsCdata)
646 #define declAttributeIsId (parser->m_declAttributeIsId)
647 #define freeTagList (parser->m_freeTagList)
648 #define freeBindingList (parser->m_freeBindingList)
649 #define inheritedBindings (parser->m_inheritedBindings)
650 #define tagStack (parser->m_tagStack)
651 #define atts (parser->m_atts)
652 #define attsSize (parser->m_attsSize)
653 #define nSpecifiedAtts (parser->m_nSpecifiedAtts)
654 #define idAttIndex (parser->m_idAttIndex)
655 #define nsAtts (parser->m_nsAtts)
656 #define nsAttsVersion (parser->m_nsAttsVersion)
657 #define nsAttsPower (parser->m_nsAttsPower)
658 #define attInfo (parser->m_attInfo)
659 #define tempPool (parser->m_tempPool)
660 #define temp2Pool (parser->m_temp2Pool)
661 #define groupConnector (parser->m_groupConnector)
662 #define groupSize (parser->m_groupSize)
663 #define namespaceSeparator (parser->m_namespaceSeparator)
664 #define parentParser (parser->m_parentParser)
665 #define ps_parsing (parser->m_parsingStatus.parsing)
666 #define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
667 #ifdef XML_DTD
668 #define isParamEntity (parser->m_isParamEntity)
669 #define useForeignDTD (parser->m_useForeignDTD)
670 #define paramEntityParsing (parser->m_paramEntityParsing)
671 #endif /* XML_DTD */
672 #define hash_secret_salt (parser->m_hash_secret_salt)
674 XML_Parser XMLCALL
675 XML_ParserCreate(const XML_Char *encodingName)
677 return XML_ParserCreate_MM(encodingName, NULL, NULL);
680 XML_Parser XMLCALL
681 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
683 XML_Char tmp[2];
684 *tmp = nsSep;
685 return XML_ParserCreate_MM(encodingName, NULL, tmp);
688 static const XML_Char implicitContext[] = {
689 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
690 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
691 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
692 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
693 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
694 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
697 static unsigned long
698 generate_hash_secret_salt(void)
700 unsigned int seed = time(NULL) % UINT_MAX;
701 srand(seed);
702 return rand();
705 static XML_Bool /* only valid for root parser */
706 startParsing(XML_Parser parser)
708 /* hash functions must be initialized before setContext() is called */
709 if (hash_secret_salt == 0)
710 hash_secret_salt = generate_hash_secret_salt();
711 if (ns) {
712 /* implicit context only set for root parser, since child
713 parsers (i.e. external entity parsers) will inherit it
715 return setContext(parser, implicitContext);
717 return XML_TRUE;
720 XML_Parser XMLCALL
721 XML_ParserCreate_MM(const XML_Char *encodingName,
722 const XML_Memory_Handling_Suite *memsuite,
723 const XML_Char *nameSep)
725 return parserCreate(encodingName, memsuite, nameSep, NULL);
728 static XML_Parser
729 parserCreate(const XML_Char *encodingName,
730 const XML_Memory_Handling_Suite *memsuite,
731 const XML_Char *nameSep,
732 DTD *dtd)
734 XML_Parser parser;
736 if (memsuite) {
737 XML_Memory_Handling_Suite *mtemp;
738 parser = (XML_Parser)
739 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
740 if (parser != NULL) {
741 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
742 mtemp->malloc_fcn = memsuite->malloc_fcn;
743 mtemp->realloc_fcn = memsuite->realloc_fcn;
744 mtemp->free_fcn = memsuite->free_fcn;
747 else {
748 XML_Memory_Handling_Suite *mtemp;
749 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
750 if (parser != NULL) {
751 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
752 mtemp->malloc_fcn = malloc;
753 mtemp->realloc_fcn = realloc;
754 mtemp->free_fcn = free;
758 if (!parser)
759 return parser;
761 buffer = NULL;
762 bufferLim = NULL;
764 attsSize = INIT_ATTS_SIZE;
765 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
766 if (atts == NULL) {
767 FREE(parser);
768 return NULL;
770 #ifdef XML_ATTR_INFO
771 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
772 if (attInfo == NULL) {
773 FREE(atts);
774 FREE(parser);
775 return NULL;
777 #endif
778 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
779 if (dataBuf == NULL) {
780 FREE(atts);
781 #ifdef XML_ATTR_INFO
782 FREE(attInfo);
783 #endif
784 FREE(parser);
785 return NULL;
787 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
789 if (dtd)
790 _dtd = dtd;
791 else {
792 _dtd = dtdCreate(&parser->m_mem);
793 if (_dtd == NULL) {
794 FREE(dataBuf);
795 FREE(atts);
796 #ifdef XML_ATTR_INFO
797 FREE(attInfo);
798 #endif
799 FREE(parser);
800 return NULL;
804 freeBindingList = NULL;
805 freeTagList = NULL;
806 freeInternalEntities = NULL;
808 groupSize = 0;
809 groupConnector = NULL;
811 unknownEncodingHandler = NULL;
812 unknownEncodingHandlerData = NULL;
814 namespaceSeparator = ASCII_EXCL;
815 ns = XML_FALSE;
816 ns_triplets = XML_FALSE;
818 nsAtts = NULL;
819 nsAttsVersion = 0;
820 nsAttsPower = 0;
822 poolInit(&tempPool, &(parser->m_mem));
823 poolInit(&temp2Pool, &(parser->m_mem));
824 parserInit(parser, encodingName);
826 if (encodingName && !protocolEncodingName) {
827 XML_ParserFree(parser);
828 return NULL;
831 if (nameSep) {
832 ns = XML_TRUE;
833 internalEncoding = XmlGetInternalEncodingNS();
834 namespaceSeparator = *nameSep;
836 else {
837 internalEncoding = XmlGetInternalEncoding();
840 return parser;
843 static void
844 parserInit(XML_Parser parser, const XML_Char *encodingName)
846 processor = prologInitProcessor;
847 XmlPrologStateInit(&prologState);
848 protocolEncodingName = (encodingName != NULL
849 ? poolCopyString(&tempPool, encodingName)
850 : NULL);
851 curBase = NULL;
852 XmlInitEncoding(&initEncoding, &encoding, 0);
853 userData = NULL;
854 handlerArg = NULL;
855 startElementHandler = NULL;
856 endElementHandler = NULL;
857 characterDataHandler = NULL;
858 processingInstructionHandler = NULL;
859 commentHandler = NULL;
860 startCdataSectionHandler = NULL;
861 endCdataSectionHandler = NULL;
862 defaultHandler = NULL;
863 startDoctypeDeclHandler = NULL;
864 endDoctypeDeclHandler = NULL;
865 unparsedEntityDeclHandler = NULL;
866 notationDeclHandler = NULL;
867 startNamespaceDeclHandler = NULL;
868 endNamespaceDeclHandler = NULL;
869 notStandaloneHandler = NULL;
870 externalEntityRefHandler = NULL;
871 externalEntityRefHandlerArg = parser;
872 skippedEntityHandler = NULL;
873 elementDeclHandler = NULL;
874 attlistDeclHandler = NULL;
875 entityDeclHandler = NULL;
876 xmlDeclHandler = NULL;
877 bufferPtr = buffer;
878 bufferEnd = buffer;
879 parseEndByteIndex = 0;
880 parseEndPtr = NULL;
881 declElementType = NULL;
882 declAttributeId = NULL;
883 declEntity = NULL;
884 doctypeName = NULL;
885 doctypeSysid = NULL;
886 doctypePubid = NULL;
887 declAttributeType = NULL;
888 declNotationName = NULL;
889 declNotationPublicId = NULL;
890 declAttributeIsCdata = XML_FALSE;
891 declAttributeIsId = XML_FALSE;
892 memset(&position, 0, sizeof(POSITION));
893 errorCode = XML_ERROR_NONE;
894 eventPtr = NULL;
895 eventEndPtr = NULL;
896 positionPtr = NULL;
897 openInternalEntities = NULL;
898 defaultExpandInternalEntities = XML_TRUE;
899 tagLevel = 0;
900 tagStack = NULL;
901 inheritedBindings = NULL;
902 nSpecifiedAtts = 0;
903 unknownEncodingMem = NULL;
904 unknownEncodingRelease = NULL;
905 unknownEncodingData = NULL;
906 parentParser = NULL;
907 ps_parsing = XML_INITIALIZED;
908 #ifdef XML_DTD
909 isParamEntity = XML_FALSE;
910 useForeignDTD = XML_FALSE;
911 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
912 #endif
913 hash_secret_salt = 0;
916 /* moves list of bindings to freeBindingList */
917 static void FASTCALL
918 moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
920 while (bindings) {
921 BINDING *b = bindings;
922 bindings = bindings->nextTagBinding;
923 b->nextTagBinding = freeBindingList;
924 freeBindingList = b;
928 XML_Bool XMLCALL
929 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
931 TAG *tStk;
932 OPEN_INTERNAL_ENTITY *openEntityList;
933 if (parentParser)
934 return XML_FALSE;
935 /* move tagStack to freeTagList */
936 tStk = tagStack;
937 while (tStk) {
938 TAG *tag = tStk;
939 tStk = tStk->parent;
940 tag->parent = freeTagList;
941 moveToFreeBindingList(parser, tag->bindings);
942 tag->bindings = NULL;
943 freeTagList = tag;
945 /* move openInternalEntities to freeInternalEntities */
946 openEntityList = openInternalEntities;
947 while (openEntityList) {
948 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
949 openEntityList = openEntity->next;
950 openEntity->next = freeInternalEntities;
951 freeInternalEntities = openEntity;
953 moveToFreeBindingList(parser, inheritedBindings);
954 FREE(unknownEncodingMem);
955 if (unknownEncodingRelease)
956 unknownEncodingRelease(unknownEncodingData);
957 poolClear(&tempPool);
958 poolClear(&temp2Pool);
959 parserInit(parser, encodingName);
960 dtdReset(_dtd, &parser->m_mem);
961 return XML_TRUE;
964 enum XML_Status XMLCALL
965 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
967 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
968 XXX There's no way for the caller to determine which of the
969 XXX possible error cases caused the XML_STATUS_ERROR return.
971 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
972 return XML_STATUS_ERROR;
973 if (encodingName == NULL)
974 protocolEncodingName = NULL;
975 else {
976 protocolEncodingName = poolCopyString(&tempPool, encodingName);
977 if (!protocolEncodingName)
978 return XML_STATUS_ERROR;
980 return XML_STATUS_OK;
983 XML_Parser XMLCALL
984 XML_ExternalEntityParserCreate(XML_Parser oldParser,
985 const XML_Char *context,
986 const XML_Char *encodingName)
988 XML_Parser parser = oldParser;
989 DTD *newDtd = NULL;
990 DTD *oldDtd = _dtd;
991 XML_StartElementHandler oldStartElementHandler = startElementHandler;
992 XML_EndElementHandler oldEndElementHandler = endElementHandler;
993 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
994 XML_ProcessingInstructionHandler oldProcessingInstructionHandler
995 = processingInstructionHandler;
996 XML_CommentHandler oldCommentHandler = commentHandler;
997 XML_StartCdataSectionHandler oldStartCdataSectionHandler
998 = startCdataSectionHandler;
999 XML_EndCdataSectionHandler oldEndCdataSectionHandler
1000 = endCdataSectionHandler;
1001 XML_DefaultHandler oldDefaultHandler = defaultHandler;
1002 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler
1003 = unparsedEntityDeclHandler;
1004 XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler;
1005 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler
1006 = startNamespaceDeclHandler;
1007 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler
1008 = endNamespaceDeclHandler;
1009 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
1010 XML_ExternalEntityRefHandler oldExternalEntityRefHandler
1011 = externalEntityRefHandler;
1012 XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler;
1013 XML_UnknownEncodingHandler oldUnknownEncodingHandler
1014 = unknownEncodingHandler;
1015 XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler;
1016 XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler;
1017 XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler;
1018 XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler;
1019 ELEMENT_TYPE * oldDeclElementType = declElementType;
1021 void *oldUserData = userData;
1022 void *oldHandlerArg = handlerArg;
1023 XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1024 XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1025 #ifdef XML_DTD
1026 enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing;
1027 int oldInEntityValue = prologState.inEntityValue;
1028 #endif
1029 XML_Bool oldns_triplets = ns_triplets;
1030 /* Note that the new parser shares the same hash secret as the old
1031 parser, so that dtdCopy and copyEntityTable can lookup values
1032 from hash tables associated with either parser without us having
1033 to worry which hash secrets each table has.
1035 unsigned long oldhash_secret_salt = hash_secret_salt;
1037 #ifdef XML_DTD
1038 if (!context)
1039 newDtd = oldDtd;
1040 #endif /* XML_DTD */
1042 /* Note that the magical uses of the pre-processor to make field
1043 access look more like C++ require that `parser' be overwritten
1044 here. This makes this function more painful to follow than it
1045 would be otherwise.
1047 if (ns) {
1048 XML_Char tmp[2];
1049 *tmp = namespaceSeparator;
1050 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1052 else {
1053 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1056 if (!parser)
1057 return NULL;
1059 startElementHandler = oldStartElementHandler;
1060 endElementHandler = oldEndElementHandler;
1061 characterDataHandler = oldCharacterDataHandler;
1062 processingInstructionHandler = oldProcessingInstructionHandler;
1063 commentHandler = oldCommentHandler;
1064 startCdataSectionHandler = oldStartCdataSectionHandler;
1065 endCdataSectionHandler = oldEndCdataSectionHandler;
1066 defaultHandler = oldDefaultHandler;
1067 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1068 notationDeclHandler = oldNotationDeclHandler;
1069 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1070 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1071 notStandaloneHandler = oldNotStandaloneHandler;
1072 externalEntityRefHandler = oldExternalEntityRefHandler;
1073 skippedEntityHandler = oldSkippedEntityHandler;
1074 unknownEncodingHandler = oldUnknownEncodingHandler;
1075 elementDeclHandler = oldElementDeclHandler;
1076 attlistDeclHandler = oldAttlistDeclHandler;
1077 entityDeclHandler = oldEntityDeclHandler;
1078 xmlDeclHandler = oldXmlDeclHandler;
1079 declElementType = oldDeclElementType;
1080 userData = oldUserData;
1081 if (oldUserData == oldHandlerArg)
1082 handlerArg = userData;
1083 else
1084 handlerArg = parser;
1085 if (oldExternalEntityRefHandlerArg != oldParser)
1086 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1087 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1088 ns_triplets = oldns_triplets;
1089 hash_secret_salt = oldhash_secret_salt;
1090 parentParser = oldParser;
1091 #ifdef XML_DTD
1092 paramEntityParsing = oldParamEntityParsing;
1093 prologState.inEntityValue = oldInEntityValue;
1094 if (context) {
1095 #endif /* XML_DTD */
1096 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
1097 || !setContext(parser, context)) {
1098 XML_ParserFree(parser);
1099 return NULL;
1101 processor = externalEntityInitProcessor;
1102 #ifdef XML_DTD
1104 else {
1105 /* The DTD instance referenced by _dtd is shared between the document's
1106 root parser and external PE parsers, therefore one does not need to
1107 call setContext. In addition, one also *must* not call setContext,
1108 because this would overwrite existing prefix->binding pointers in
1109 _dtd with ones that get destroyed with the external PE parser.
1110 This would leave those prefixes with dangling pointers.
1112 isParamEntity = XML_TRUE;
1113 XmlPrologStateInitExternalEntity(&prologState);
1114 processor = externalParEntInitProcessor;
1116 #endif /* XML_DTD */
1117 return parser;
1120 static void FASTCALL
1121 destroyBindings(BINDING *bindings, XML_Parser parser)
1123 for (;;) {
1124 BINDING *b = bindings;
1125 if (!b)
1126 break;
1127 bindings = b->nextTagBinding;
1128 FREE(b->uri);
1129 FREE(b);
1133 void XMLCALL
1134 XML_ParserFree(XML_Parser parser)
1136 TAG *tagList;
1137 OPEN_INTERNAL_ENTITY *entityList;
1138 if (parser == NULL)
1139 return;
1140 /* free tagStack and freeTagList */
1141 tagList = tagStack;
1142 for (;;) {
1143 TAG *p;
1144 if (tagList == NULL) {
1145 if (freeTagList == NULL)
1146 break;
1147 tagList = freeTagList;
1148 freeTagList = NULL;
1150 p = tagList;
1151 tagList = tagList->parent;
1152 FREE(p->buf);
1153 destroyBindings(p->bindings, parser);
1154 FREE(p);
1156 /* free openInternalEntities and freeInternalEntities */
1157 entityList = openInternalEntities;
1158 for (;;) {
1159 OPEN_INTERNAL_ENTITY *openEntity;
1160 if (entityList == NULL) {
1161 if (freeInternalEntities == NULL)
1162 break;
1163 entityList = freeInternalEntities;
1164 freeInternalEntities = NULL;
1166 openEntity = entityList;
1167 entityList = entityList->next;
1168 FREE(openEntity);
1171 destroyBindings(freeBindingList, parser);
1172 destroyBindings(inheritedBindings, parser);
1173 poolDestroy(&tempPool);
1174 poolDestroy(&temp2Pool);
1175 #ifdef XML_DTD
1176 /* external parameter entity parsers share the DTD structure
1177 parser->m_dtd with the root parser, so we must not destroy it
1179 if (!isParamEntity && _dtd)
1180 #else
1181 if (_dtd)
1182 #endif /* XML_DTD */
1183 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
1184 FREE((void *)atts);
1185 #ifdef XML_ATTR_INFO
1186 FREE((void *)attInfo);
1187 #endif
1188 FREE(groupConnector);
1189 FREE(buffer);
1190 FREE(dataBuf);
1191 FREE(nsAtts);
1192 FREE(unknownEncodingMem);
1193 if (unknownEncodingRelease)
1194 unknownEncodingRelease(unknownEncodingData);
1195 FREE(parser);
1198 void XMLCALL
1199 XML_UseParserAsHandlerArg(XML_Parser parser)
1201 handlerArg = parser;
1204 enum XML_Error XMLCALL
1205 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1207 #ifdef XML_DTD
1208 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1209 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1210 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1211 useForeignDTD = useDTD;
1212 return XML_ERROR_NONE;
1213 #else
1214 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1215 #endif
1218 void XMLCALL
1219 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1221 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1222 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1223 return;
1224 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1227 void XMLCALL
1228 XML_SetUserData(XML_Parser parser, void *p)
1230 if (handlerArg == userData)
1231 handlerArg = userData = p;
1232 else
1233 userData = p;
1236 enum XML_Status XMLCALL
1237 XML_SetBase(XML_Parser parser, const XML_Char *p)
1239 if (p) {
1240 p = poolCopyString(&_dtd->pool, p);
1241 if (!p)
1242 return XML_STATUS_ERROR;
1243 curBase = p;
1245 else
1246 curBase = NULL;
1247 return XML_STATUS_OK;
1250 const XML_Char * XMLCALL
1251 XML_GetBase(XML_Parser parser)
1253 return curBase;
1256 int XMLCALL
1257 XML_GetSpecifiedAttributeCount(XML_Parser parser)
1259 return nSpecifiedAtts;
1262 int XMLCALL
1263 XML_GetIdAttributeIndex(XML_Parser parser)
1265 return idAttIndex;
1268 #ifdef XML_ATTR_INFO
1269 const XML_AttrInfo * XMLCALL
1270 XML_GetAttributeInfo(XML_Parser parser)
1272 return attInfo;
1274 #endif
1276 void XMLCALL
1277 XML_SetElementHandler(XML_Parser parser,
1278 XML_StartElementHandler start,
1279 XML_EndElementHandler end)
1281 startElementHandler = start;
1282 endElementHandler = end;
1285 void XMLCALL
1286 XML_SetStartElementHandler(XML_Parser parser,
1287 XML_StartElementHandler start) {
1288 startElementHandler = start;
1291 void XMLCALL
1292 XML_SetEndElementHandler(XML_Parser parser,
1293 XML_EndElementHandler end) {
1294 endElementHandler = end;
1297 void XMLCALL
1298 XML_SetCharacterDataHandler(XML_Parser parser,
1299 XML_CharacterDataHandler handler)
1301 characterDataHandler = handler;
1304 void XMLCALL
1305 XML_SetProcessingInstructionHandler(XML_Parser parser,
1306 XML_ProcessingInstructionHandler handler)
1308 processingInstructionHandler = handler;
1311 void XMLCALL
1312 XML_SetCommentHandler(XML_Parser parser,
1313 XML_CommentHandler handler)
1315 commentHandler = handler;
1318 void XMLCALL
1319 XML_SetCdataSectionHandler(XML_Parser parser,
1320 XML_StartCdataSectionHandler start,
1321 XML_EndCdataSectionHandler end)
1323 startCdataSectionHandler = start;
1324 endCdataSectionHandler = end;
1327 void XMLCALL
1328 XML_SetStartCdataSectionHandler(XML_Parser parser,
1329 XML_StartCdataSectionHandler start) {
1330 startCdataSectionHandler = start;
1333 void XMLCALL
1334 XML_SetEndCdataSectionHandler(XML_Parser parser,
1335 XML_EndCdataSectionHandler end) {
1336 endCdataSectionHandler = end;
1339 void XMLCALL
1340 XML_SetDefaultHandler(XML_Parser parser,
1341 XML_DefaultHandler handler)
1343 defaultHandler = handler;
1344 defaultExpandInternalEntities = XML_FALSE;
1347 void XMLCALL
1348 XML_SetDefaultHandlerExpand(XML_Parser parser,
1349 XML_DefaultHandler handler)
1351 defaultHandler = handler;
1352 defaultExpandInternalEntities = XML_TRUE;
1355 void XMLCALL
1356 XML_SetDoctypeDeclHandler(XML_Parser parser,
1357 XML_StartDoctypeDeclHandler start,
1358 XML_EndDoctypeDeclHandler end)
1360 startDoctypeDeclHandler = start;
1361 endDoctypeDeclHandler = end;
1364 void XMLCALL
1365 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1366 XML_StartDoctypeDeclHandler start) {
1367 startDoctypeDeclHandler = start;
1370 void XMLCALL
1371 XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1372 XML_EndDoctypeDeclHandler end) {
1373 endDoctypeDeclHandler = end;
1376 void XMLCALL
1377 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1378 XML_UnparsedEntityDeclHandler handler)
1380 unparsedEntityDeclHandler = handler;
1383 void XMLCALL
1384 XML_SetNotationDeclHandler(XML_Parser parser,
1385 XML_NotationDeclHandler handler)
1387 notationDeclHandler = handler;
1390 void XMLCALL
1391 XML_SetNamespaceDeclHandler(XML_Parser parser,
1392 XML_StartNamespaceDeclHandler start,
1393 XML_EndNamespaceDeclHandler end)
1395 startNamespaceDeclHandler = start;
1396 endNamespaceDeclHandler = end;
1399 void XMLCALL
1400 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1401 XML_StartNamespaceDeclHandler start) {
1402 startNamespaceDeclHandler = start;
1405 void XMLCALL
1406 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1407 XML_EndNamespaceDeclHandler end) {
1408 endNamespaceDeclHandler = end;
1411 void XMLCALL
1412 XML_SetNotStandaloneHandler(XML_Parser parser,
1413 XML_NotStandaloneHandler handler)
1415 notStandaloneHandler = handler;
1418 void XMLCALL
1419 XML_SetExternalEntityRefHandler(XML_Parser parser,
1420 XML_ExternalEntityRefHandler handler)
1422 externalEntityRefHandler = handler;
1425 void XMLCALL
1426 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
1428 if (arg)
1429 externalEntityRefHandlerArg = (XML_Parser)arg;
1430 else
1431 externalEntityRefHandlerArg = parser;
1434 void XMLCALL
1435 XML_SetSkippedEntityHandler(XML_Parser parser,
1436 XML_SkippedEntityHandler handler)
1438 skippedEntityHandler = handler;
1441 void XMLCALL
1442 XML_SetUnknownEncodingHandler(XML_Parser parser,
1443 XML_UnknownEncodingHandler handler,
1444 void *data)
1446 unknownEncodingHandler = handler;
1447 unknownEncodingHandlerData = data;
1450 void XMLCALL
1451 XML_SetElementDeclHandler(XML_Parser parser,
1452 XML_ElementDeclHandler eldecl)
1454 elementDeclHandler = eldecl;
1457 void XMLCALL
1458 XML_SetAttlistDeclHandler(XML_Parser parser,
1459 XML_AttlistDeclHandler attdecl)
1461 attlistDeclHandler = attdecl;
1464 void XMLCALL
1465 XML_SetEntityDeclHandler(XML_Parser parser,
1466 XML_EntityDeclHandler handler)
1468 entityDeclHandler = handler;
1471 void XMLCALL
1472 XML_SetXmlDeclHandler(XML_Parser parser,
1473 XML_XmlDeclHandler handler) {
1474 xmlDeclHandler = handler;
1477 int XMLCALL
1478 XML_SetParamEntityParsing(XML_Parser parser,
1479 enum XML_ParamEntityParsing peParsing)
1481 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1482 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1483 return 0;
1484 #ifdef XML_DTD
1485 paramEntityParsing = peParsing;
1486 return 1;
1487 #else
1488 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1489 #endif
1492 int XMLCALL
1493 XML_SetHashSalt(XML_Parser parser,
1494 unsigned long hash_salt)
1496 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1497 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1498 return 0;
1499 hash_secret_salt = hash_salt;
1500 return 1;
1503 enum XML_Status XMLCALL
1504 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1506 switch (ps_parsing) {
1507 case XML_SUSPENDED:
1508 errorCode = XML_ERROR_SUSPENDED;
1509 return XML_STATUS_ERROR;
1510 case XML_FINISHED:
1511 errorCode = XML_ERROR_FINISHED;
1512 return XML_STATUS_ERROR;
1513 case XML_INITIALIZED:
1514 if (parentParser == NULL && !startParsing(parser)) {
1515 errorCode = XML_ERROR_NO_MEMORY;
1516 return XML_STATUS_ERROR;
1518 default:
1519 ps_parsing = XML_PARSING;
1522 if (len == 0) {
1523 ps_finalBuffer = (XML_Bool)isFinal;
1524 if (!isFinal)
1525 return XML_STATUS_OK;
1526 positionPtr = bufferPtr;
1527 parseEndPtr = bufferEnd;
1529 /* If data are left over from last buffer, and we now know that these
1530 data are the final chunk of input, then we have to check them again
1531 to detect errors based on that fact.
1533 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1535 if (errorCode == XML_ERROR_NONE) {
1536 switch (ps_parsing) {
1537 case XML_SUSPENDED:
1538 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1539 positionPtr = bufferPtr;
1540 return XML_STATUS_SUSPENDED;
1541 case XML_INITIALIZED:
1542 case XML_PARSING:
1543 ps_parsing = XML_FINISHED;
1544 /* fall through */
1545 default:
1546 return XML_STATUS_OK;
1549 eventEndPtr = eventPtr;
1550 processor = errorProcessor;
1551 return XML_STATUS_ERROR;
1553 #ifndef XML_CONTEXT_BYTES
1554 else if (bufferPtr == bufferEnd) {
1555 const char *end;
1556 int nLeftOver;
1557 enum XML_Error result;
1558 parseEndByteIndex += len;
1559 positionPtr = s;
1560 ps_finalBuffer = (XML_Bool)isFinal;
1562 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
1564 if (errorCode != XML_ERROR_NONE) {
1565 eventEndPtr = eventPtr;
1566 processor = errorProcessor;
1567 return XML_STATUS_ERROR;
1569 else {
1570 switch (ps_parsing) {
1571 case XML_SUSPENDED:
1572 result = XML_STATUS_SUSPENDED;
1573 break;
1574 case XML_INITIALIZED:
1575 case XML_PARSING:
1576 if (isFinal) {
1577 ps_parsing = XML_FINISHED;
1578 return XML_STATUS_OK;
1580 /* fall through */
1581 default:
1582 result = XML_STATUS_OK;
1586 XmlUpdatePosition(encoding, positionPtr, end, &position);
1587 nLeftOver = s + len - end;
1588 if (nLeftOver) {
1589 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
1590 /* FIXME avoid integer overflow */
1591 char *temp;
1592 temp = (buffer == NULL
1593 ? (char *)MALLOC(len * 2)
1594 : (char *)REALLOC(buffer, len * 2));
1595 if (temp == NULL) {
1596 errorCode = XML_ERROR_NO_MEMORY;
1597 eventPtr = eventEndPtr = NULL;
1598 processor = errorProcessor;
1599 return XML_STATUS_ERROR;
1601 buffer = temp;
1602 bufferLim = buffer + len * 2;
1604 memcpy(buffer, end, nLeftOver);
1606 bufferPtr = buffer;
1607 bufferEnd = buffer + nLeftOver;
1608 positionPtr = bufferPtr;
1609 parseEndPtr = bufferEnd;
1610 eventPtr = bufferPtr;
1611 eventEndPtr = bufferPtr;
1612 return result;
1614 #endif /* not defined XML_CONTEXT_BYTES */
1615 else {
1616 void *buff = XML_GetBuffer(parser, len);
1617 if (buff == NULL)
1618 return XML_STATUS_ERROR;
1619 else {
1620 memcpy(buff, s, len);
1621 return XML_ParseBuffer(parser, len, isFinal);
1626 enum XML_Status XMLCALL
1627 XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1629 const char *start;
1630 enum XML_Status result = XML_STATUS_OK;
1632 switch (ps_parsing) {
1633 case XML_SUSPENDED:
1634 errorCode = XML_ERROR_SUSPENDED;
1635 return XML_STATUS_ERROR;
1636 case XML_FINISHED:
1637 errorCode = XML_ERROR_FINISHED;
1638 return XML_STATUS_ERROR;
1639 case XML_INITIALIZED:
1640 if (parentParser == NULL && !startParsing(parser)) {
1641 errorCode = XML_ERROR_NO_MEMORY;
1642 return XML_STATUS_ERROR;
1644 default:
1645 ps_parsing = XML_PARSING;
1648 start = bufferPtr;
1649 positionPtr = start;
1650 bufferEnd += len;
1651 parseEndPtr = bufferEnd;
1652 parseEndByteIndex += len;
1653 ps_finalBuffer = (XML_Bool)isFinal;
1655 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
1657 if (errorCode != XML_ERROR_NONE) {
1658 eventEndPtr = eventPtr;
1659 processor = errorProcessor;
1660 return XML_STATUS_ERROR;
1662 else {
1663 switch (ps_parsing) {
1664 case XML_SUSPENDED:
1665 result = XML_STATUS_SUSPENDED;
1666 break;
1667 case XML_INITIALIZED:
1668 case XML_PARSING:
1669 if (isFinal) {
1670 ps_parsing = XML_FINISHED;
1671 return result;
1673 default: ; /* should not happen */
1677 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1678 positionPtr = bufferPtr;
1679 return result;
1682 void * XMLCALL
1683 XML_GetBuffer(XML_Parser parser, int len)
1685 switch (ps_parsing) {
1686 case XML_SUSPENDED:
1687 errorCode = XML_ERROR_SUSPENDED;
1688 return NULL;
1689 case XML_FINISHED:
1690 errorCode = XML_ERROR_FINISHED;
1691 return NULL;
1692 default: ;
1695 if (len > bufferLim - bufferEnd) {
1696 /* FIXME avoid integer overflow */
1697 int neededSize = len + (int)(bufferEnd - bufferPtr);
1698 #ifdef XML_CONTEXT_BYTES
1699 int keep = (int)(bufferPtr - buffer);
1701 if (keep > XML_CONTEXT_BYTES)
1702 keep = XML_CONTEXT_BYTES;
1703 neededSize += keep;
1704 #endif /* defined XML_CONTEXT_BYTES */
1705 if (neededSize <= bufferLim - buffer) {
1706 #ifdef XML_CONTEXT_BYTES
1707 if (keep < bufferPtr - buffer) {
1708 int offset = (int)(bufferPtr - buffer) - keep;
1709 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
1710 bufferEnd -= offset;
1711 bufferPtr -= offset;
1713 #else
1714 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
1715 bufferEnd = buffer + (bufferEnd - bufferPtr);
1716 bufferPtr = buffer;
1717 #endif /* not defined XML_CONTEXT_BYTES */
1719 else {
1720 char *newBuf;
1721 int bufferSize = (int)(bufferLim - bufferPtr);
1722 if (bufferSize == 0)
1723 bufferSize = INIT_BUFFER_SIZE;
1724 do {
1725 bufferSize *= 2;
1726 } while (bufferSize < neededSize);
1727 newBuf = (char *)MALLOC(bufferSize);
1728 if (newBuf == 0) {
1729 errorCode = XML_ERROR_NO_MEMORY;
1730 return NULL;
1732 bufferLim = newBuf + bufferSize;
1733 #ifdef XML_CONTEXT_BYTES
1734 if (bufferPtr) {
1735 int keep = (int)(bufferPtr - buffer);
1736 if (keep > XML_CONTEXT_BYTES)
1737 keep = XML_CONTEXT_BYTES;
1738 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
1739 FREE(buffer);
1740 buffer = newBuf;
1741 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
1742 bufferPtr = buffer + keep;
1744 else {
1745 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1746 bufferPtr = buffer = newBuf;
1748 #else
1749 if (bufferPtr) {
1750 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
1751 FREE(buffer);
1753 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1754 bufferPtr = buffer = newBuf;
1755 #endif /* not defined XML_CONTEXT_BYTES */
1757 eventPtr = eventEndPtr = NULL;
1758 positionPtr = NULL;
1760 return bufferEnd;
1763 enum XML_Status XMLCALL
1764 XML_StopParser(XML_Parser parser, XML_Bool resumable)
1766 switch (ps_parsing) {
1767 case XML_SUSPENDED:
1768 if (resumable) {
1769 errorCode = XML_ERROR_SUSPENDED;
1770 return XML_STATUS_ERROR;
1772 ps_parsing = XML_FINISHED;
1773 break;
1774 case XML_FINISHED:
1775 errorCode = XML_ERROR_FINISHED;
1776 return XML_STATUS_ERROR;
1777 default:
1778 if (resumable) {
1779 #ifdef XML_DTD
1780 if (isParamEntity) {
1781 errorCode = XML_ERROR_SUSPEND_PE;
1782 return XML_STATUS_ERROR;
1784 #endif
1785 ps_parsing = XML_SUSPENDED;
1787 else
1788 ps_parsing = XML_FINISHED;
1790 return XML_STATUS_OK;
1793 enum XML_Status XMLCALL
1794 XML_ResumeParser(XML_Parser parser)
1796 enum XML_Status result = XML_STATUS_OK;
1798 if (ps_parsing != XML_SUSPENDED) {
1799 errorCode = XML_ERROR_NOT_SUSPENDED;
1800 return XML_STATUS_ERROR;
1802 ps_parsing = XML_PARSING;
1804 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1806 if (errorCode != XML_ERROR_NONE) {
1807 eventEndPtr = eventPtr;
1808 processor = errorProcessor;
1809 return XML_STATUS_ERROR;
1811 else {
1812 switch (ps_parsing) {
1813 case XML_SUSPENDED:
1814 result = XML_STATUS_SUSPENDED;
1815 break;
1816 case XML_INITIALIZED:
1817 case XML_PARSING:
1818 if (ps_finalBuffer) {
1819 ps_parsing = XML_FINISHED;
1820 return result;
1822 default: ;
1826 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1827 positionPtr = bufferPtr;
1828 return result;
1831 void XMLCALL
1832 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
1834 assert(status != NULL);
1835 *status = parser->m_parsingStatus;
1838 enum XML_Error XMLCALL
1839 XML_GetErrorCode(XML_Parser parser)
1841 return errorCode;
1844 XML_Index XMLCALL
1845 XML_GetCurrentByteIndex(XML_Parser parser)
1847 if (eventPtr)
1848 return parseEndByteIndex - (parseEndPtr - eventPtr);
1849 return -1;
1852 int XMLCALL
1853 XML_GetCurrentByteCount(XML_Parser parser)
1855 if (eventEndPtr && eventPtr)
1856 return (int)(eventEndPtr - eventPtr);
1857 return 0;
1860 const char * XMLCALL
1861 XML_GetInputContext(XML_Parser parser, int *offset, int *size)
1863 #ifdef XML_CONTEXT_BYTES
1864 if (eventPtr && buffer) {
1865 *offset = (int)(eventPtr - buffer);
1866 *size = (int)(bufferEnd - buffer);
1867 return buffer;
1869 #endif /* defined XML_CONTEXT_BYTES */
1870 return (char *) 0;
1873 XML_Size XMLCALL
1874 XML_GetCurrentLineNumber(XML_Parser parser)
1876 if (eventPtr && eventPtr >= positionPtr) {
1877 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1878 positionPtr = eventPtr;
1880 return position.lineNumber + 1;
1883 XML_Size XMLCALL
1884 XML_GetCurrentColumnNumber(XML_Parser parser)
1886 if (eventPtr && eventPtr >= positionPtr) {
1887 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1888 positionPtr = eventPtr;
1890 return position.columnNumber;
1893 void XMLCALL
1894 XML_FreeContentModel(XML_Parser parser, XML_Content *model)
1896 FREE(model);
1899 void * XMLCALL
1900 XML_MemMalloc(XML_Parser parser, size_t size)
1902 return MALLOC(size);
1905 void * XMLCALL
1906 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
1908 return REALLOC(ptr, size);
1911 void XMLCALL
1912 XML_MemFree(XML_Parser parser, void *ptr)
1914 FREE(ptr);
1917 void XMLCALL
1918 XML_DefaultCurrent(XML_Parser parser)
1920 if (defaultHandler) {
1921 if (openInternalEntities)
1922 reportDefault(parser,
1923 internalEncoding,
1924 openInternalEntities->internalEventPtr,
1925 openInternalEntities->internalEventEndPtr);
1926 else
1927 reportDefault(parser, encoding, eventPtr, eventEndPtr);
1931 const XML_LChar * XMLCALL
1932 XML_ErrorString(enum XML_Error code)
1934 static const XML_LChar* const message[] = {
1936 XML_L("out of memory"),
1937 XML_L("syntax error"),
1938 XML_L("no element found"),
1939 XML_L("not well-formed (invalid token)"),
1940 XML_L("unclosed token"),
1941 XML_L("partial character"),
1942 XML_L("mismatched tag"),
1943 XML_L("duplicate attribute"),
1944 XML_L("junk after document element"),
1945 XML_L("illegal parameter entity reference"),
1946 XML_L("undefined entity"),
1947 XML_L("recursive entity reference"),
1948 XML_L("asynchronous entity"),
1949 XML_L("reference to invalid character number"),
1950 XML_L("reference to binary entity"),
1951 XML_L("reference to external entity in attribute"),
1952 XML_L("XML or text declaration not at start of entity"),
1953 XML_L("unknown encoding"),
1954 XML_L("encoding specified in XML declaration is incorrect"),
1955 XML_L("unclosed CDATA section"),
1956 XML_L("error in processing external entity reference"),
1957 XML_L("document is not standalone"),
1958 XML_L("unexpected parser state - please send a bug report"),
1959 XML_L("entity declared in parameter entity"),
1960 XML_L("requested feature requires XML_DTD support in Expat"),
1961 XML_L("cannot change setting once parsing has begun"),
1962 XML_L("unbound prefix"),
1963 XML_L("must not undeclare prefix"),
1964 XML_L("incomplete markup in parameter entity"),
1965 XML_L("XML declaration not well-formed"),
1966 XML_L("text declaration not well-formed"),
1967 XML_L("illegal character(s) in public id"),
1968 XML_L("parser suspended"),
1969 XML_L("parser not suspended"),
1970 XML_L("parsing aborted"),
1971 XML_L("parsing finished"),
1972 XML_L("cannot suspend in external parameter entity"),
1973 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
1974 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
1975 XML_L("prefix must not be bound to one of the reserved namespace names")
1977 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1978 return message[code];
1979 return NULL;
1982 const XML_LChar * XMLCALL
1983 XML_ExpatVersion(void) {
1985 /* V1 is used to string-ize the version number. However, it would
1986 string-ize the actual version macro *names* unless we get them
1987 substituted before being passed to V1. CPP is defined to expand
1988 a macro, then rescan for more expansions. Thus, we use V2 to expand
1989 the version macros, then CPP will expand the resulting V1() macro
1990 with the correct numerals. */
1991 /* ### I'm assuming cpp is portable in this respect... */
1993 #define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
1994 #define V2(a,b,c) XML_L("expat_")V1(a,b,c)
1996 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
1998 #undef V1
1999 #undef V2
2002 XML_Expat_Version XMLCALL
2003 XML_ExpatVersionInfo(void)
2005 XML_Expat_Version version;
2007 version.major = XML_MAJOR_VERSION;
2008 version.minor = XML_MINOR_VERSION;
2009 version.micro = XML_MICRO_VERSION;
2011 return version;
2014 const XML_Feature * XMLCALL
2015 XML_GetFeatureList(void)
2017 static const XML_Feature features[] = {
2018 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2019 sizeof(XML_Char)},
2020 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2021 sizeof(XML_LChar)},
2022 #ifdef XML_UNICODE
2023 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2024 #endif
2025 #ifdef XML_UNICODE_WCHAR_T
2026 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2027 #endif
2028 #ifdef XML_DTD
2029 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2030 #endif
2031 #ifdef XML_CONTEXT_BYTES
2032 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2033 XML_CONTEXT_BYTES},
2034 #endif
2035 #ifdef XML_MIN_SIZE
2036 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2037 #endif
2038 #ifdef XML_NS
2039 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2040 #endif
2041 #ifdef XML_LARGE_SIZE
2042 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2043 #endif
2044 #ifdef XML_ATTR_INFO
2045 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2046 #endif
2047 {XML_FEATURE_END, NULL, 0}
2050 return features;
2053 /* Initially tag->rawName always points into the parse buffer;
2054 for those TAG instances opened while the current parse buffer was
2055 processed, and not yet closed, we need to store tag->rawName in a more
2056 permanent location, since the parse buffer is about to be discarded.
2058 static XML_Bool
2059 storeRawNames(XML_Parser parser)
2061 TAG *tag = tagStack;
2062 while (tag) {
2063 int bufSize;
2064 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2065 char *rawNameBuf = tag->buf + nameLen;
2066 /* Stop if already stored. Since tagStack is a stack, we can stop
2067 at the first entry that has already been copied; everything
2068 below it in the stack is already been accounted for in a
2069 previous call to this function.
2071 if (tag->rawName == rawNameBuf)
2072 break;
2073 /* For re-use purposes we need to ensure that the
2074 size of tag->buf is a multiple of sizeof(XML_Char).
2076 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2077 if (bufSize > tag->bufEnd - tag->buf) {
2078 char *temp = (char *)REALLOC(tag->buf, bufSize);
2079 if (temp == NULL)
2080 return XML_FALSE;
2081 /* if tag->name.str points to tag->buf (only when namespace
2082 processing is off) then we have to update it
2084 if (tag->name.str == (XML_Char *)tag->buf)
2085 tag->name.str = (XML_Char *)temp;
2086 /* if tag->name.localPart is set (when namespace processing is on)
2087 then update it as well, since it will always point into tag->buf
2089 if (tag->name.localPart)
2090 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2091 (XML_Char *)tag->buf);
2092 tag->buf = temp;
2093 tag->bufEnd = temp + bufSize;
2094 rawNameBuf = temp + nameLen;
2096 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2097 tag->rawName = rawNameBuf;
2098 tag = tag->parent;
2100 return XML_TRUE;
2103 static enum XML_Error PTRCALL
2104 contentProcessor(XML_Parser parser,
2105 const char *start,
2106 const char *end,
2107 const char **endPtr)
2109 enum XML_Error result = doContent(parser, 0, encoding, start, end,
2110 endPtr, (XML_Bool)!ps_finalBuffer);
2111 if (result == XML_ERROR_NONE) {
2112 if (!storeRawNames(parser))
2113 return XML_ERROR_NO_MEMORY;
2115 return result;
2118 static enum XML_Error PTRCALL
2119 externalEntityInitProcessor(XML_Parser parser,
2120 const char *start,
2121 const char *end,
2122 const char **endPtr)
2124 enum XML_Error result = initializeEncoding(parser);
2125 if (result != XML_ERROR_NONE)
2126 return result;
2127 processor = externalEntityInitProcessor2;
2128 return externalEntityInitProcessor2(parser, start, end, endPtr);
2131 static enum XML_Error PTRCALL
2132 externalEntityInitProcessor2(XML_Parser parser,
2133 const char *start,
2134 const char *end,
2135 const char **endPtr)
2137 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2138 int tok = XmlContentTok(encoding, start, end, &next);
2139 switch (tok) {
2140 case XML_TOK_BOM:
2141 /* If we are at the end of the buffer, this would cause the next stage,
2142 i.e. externalEntityInitProcessor3, to pass control directly to
2143 doContent (by detecting XML_TOK_NONE) without processing any xml text
2144 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2146 if (next == end && !ps_finalBuffer) {
2147 *endPtr = next;
2148 return XML_ERROR_NONE;
2150 start = next;
2151 break;
2152 case XML_TOK_PARTIAL:
2153 if (!ps_finalBuffer) {
2154 *endPtr = start;
2155 return XML_ERROR_NONE;
2157 eventPtr = start;
2158 return XML_ERROR_UNCLOSED_TOKEN;
2159 case XML_TOK_PARTIAL_CHAR:
2160 if (!ps_finalBuffer) {
2161 *endPtr = start;
2162 return XML_ERROR_NONE;
2164 eventPtr = start;
2165 return XML_ERROR_PARTIAL_CHAR;
2167 processor = externalEntityInitProcessor3;
2168 return externalEntityInitProcessor3(parser, start, end, endPtr);
2171 static enum XML_Error PTRCALL
2172 externalEntityInitProcessor3(XML_Parser parser,
2173 const char *start,
2174 const char *end,
2175 const char **endPtr)
2177 int tok;
2178 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2179 eventPtr = start;
2180 tok = XmlContentTok(encoding, start, end, &next);
2181 eventEndPtr = next;
2183 switch (tok) {
2184 case XML_TOK_XML_DECL:
2186 enum XML_Error result;
2187 result = processXmlDecl(parser, 1, start, next);
2188 if (result != XML_ERROR_NONE)
2189 return result;
2190 switch (ps_parsing) {
2191 case XML_SUSPENDED:
2192 *endPtr = next;
2193 return XML_ERROR_NONE;
2194 case XML_FINISHED:
2195 return XML_ERROR_ABORTED;
2196 default:
2197 start = next;
2200 break;
2201 case XML_TOK_PARTIAL:
2202 if (!ps_finalBuffer) {
2203 *endPtr = start;
2204 return XML_ERROR_NONE;
2206 return XML_ERROR_UNCLOSED_TOKEN;
2207 case XML_TOK_PARTIAL_CHAR:
2208 if (!ps_finalBuffer) {
2209 *endPtr = start;
2210 return XML_ERROR_NONE;
2212 return XML_ERROR_PARTIAL_CHAR;
2214 processor = externalEntityContentProcessor;
2215 tagLevel = 1;
2216 return externalEntityContentProcessor(parser, start, end, endPtr);
2219 static enum XML_Error PTRCALL
2220 externalEntityContentProcessor(XML_Parser parser,
2221 const char *start,
2222 const char *end,
2223 const char **endPtr)
2225 enum XML_Error result = doContent(parser, 1, encoding, start, end,
2226 endPtr, (XML_Bool)!ps_finalBuffer);
2227 if (result == XML_ERROR_NONE) {
2228 if (!storeRawNames(parser))
2229 return XML_ERROR_NO_MEMORY;
2231 return result;
2234 static enum XML_Error
2235 doContent(XML_Parser parser,
2236 int startTagLevel,
2237 const ENCODING *enc,
2238 const char *s,
2239 const char *end,
2240 const char **nextPtr,
2241 XML_Bool haveMore)
2243 /* save one level of indirection */
2244 DTD * const dtd = _dtd;
2246 const char **eventPP;
2247 const char **eventEndPP;
2248 if (enc == encoding) {
2249 eventPP = &eventPtr;
2250 eventEndPP = &eventEndPtr;
2252 else {
2253 eventPP = &(openInternalEntities->internalEventPtr);
2254 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2256 *eventPP = s;
2258 for (;;) {
2259 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2260 int tok = XmlContentTok(enc, s, end, &next);
2261 *eventEndPP = next;
2262 switch (tok) {
2263 case XML_TOK_TRAILING_CR:
2264 if (haveMore) {
2265 *nextPtr = s;
2266 return XML_ERROR_NONE;
2268 *eventEndPP = end;
2269 if (characterDataHandler) {
2270 XML_Char c = 0xA;
2271 characterDataHandler(handlerArg, &c, 1);
2273 else if (defaultHandler)
2274 reportDefault(parser, enc, s, end);
2275 /* We are at the end of the final buffer, should we check for
2276 XML_SUSPENDED, XML_FINISHED?
2278 if (startTagLevel == 0)
2279 return XML_ERROR_NO_ELEMENTS;
2280 if (tagLevel != startTagLevel)
2281 return XML_ERROR_ASYNC_ENTITY;
2282 *nextPtr = end;
2283 return XML_ERROR_NONE;
2284 case XML_TOK_NONE:
2285 if (haveMore) {
2286 *nextPtr = s;
2287 return XML_ERROR_NONE;
2289 if (startTagLevel > 0) {
2290 if (tagLevel != startTagLevel)
2291 return XML_ERROR_ASYNC_ENTITY;
2292 *nextPtr = s;
2293 return XML_ERROR_NONE;
2295 return XML_ERROR_NO_ELEMENTS;
2296 case XML_TOK_INVALID:
2297 *eventPP = next;
2298 return XML_ERROR_INVALID_TOKEN;
2299 case XML_TOK_PARTIAL:
2300 if (haveMore) {
2301 *nextPtr = s;
2302 return XML_ERROR_NONE;
2304 return XML_ERROR_UNCLOSED_TOKEN;
2305 case XML_TOK_PARTIAL_CHAR:
2306 if (haveMore) {
2307 *nextPtr = s;
2308 return XML_ERROR_NONE;
2310 return XML_ERROR_PARTIAL_CHAR;
2311 case XML_TOK_ENTITY_REF:
2313 const XML_Char *name;
2314 ENTITY *entity;
2315 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2316 s + enc->minBytesPerChar,
2317 next - enc->minBytesPerChar);
2318 if (ch) {
2319 if (characterDataHandler)
2320 characterDataHandler(handlerArg, &ch, 1);
2321 else if (defaultHandler)
2322 reportDefault(parser, enc, s, next);
2323 break;
2325 name = poolStoreString(&dtd->pool, enc,
2326 s + enc->minBytesPerChar,
2327 next - enc->minBytesPerChar);
2328 if (!name)
2329 return XML_ERROR_NO_MEMORY;
2330 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2331 poolDiscard(&dtd->pool);
2332 /* First, determine if a check for an existing declaration is needed;
2333 if yes, check that the entity exists, and that it is internal,
2334 otherwise call the skipped entity or default handler.
2336 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2337 if (!entity)
2338 return XML_ERROR_UNDEFINED_ENTITY;
2339 else if (!entity->is_internal)
2340 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2342 else if (!entity) {
2343 if (skippedEntityHandler)
2344 skippedEntityHandler(handlerArg, name, 0);
2345 else if (defaultHandler)
2346 reportDefault(parser, enc, s, next);
2347 break;
2349 if (entity->open)
2350 return XML_ERROR_RECURSIVE_ENTITY_REF;
2351 if (entity->notation)
2352 return XML_ERROR_BINARY_ENTITY_REF;
2353 if (entity->textPtr) {
2354 enum XML_Error result;
2355 if (!defaultExpandInternalEntities) {
2356 if (skippedEntityHandler)
2357 skippedEntityHandler(handlerArg, entity->name, 0);
2358 else if (defaultHandler)
2359 reportDefault(parser, enc, s, next);
2360 break;
2362 result = processInternalEntity(parser, entity, XML_FALSE);
2363 if (result != XML_ERROR_NONE)
2364 return result;
2366 else if (externalEntityRefHandler) {
2367 const XML_Char *context;
2368 entity->open = XML_TRUE;
2369 context = getContext(parser);
2370 entity->open = XML_FALSE;
2371 if (!context)
2372 return XML_ERROR_NO_MEMORY;
2373 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2374 context,
2375 entity->base,
2376 entity->systemId,
2377 entity->publicId))
2378 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2379 poolDiscard(&tempPool);
2381 else if (defaultHandler)
2382 reportDefault(parser, enc, s, next);
2383 break;
2385 case XML_TOK_START_TAG_NO_ATTS:
2386 /* fall through */
2387 case XML_TOK_START_TAG_WITH_ATTS:
2389 TAG *tag;
2390 enum XML_Error result;
2391 XML_Char *toPtr;
2392 if (freeTagList) {
2393 tag = freeTagList;
2394 freeTagList = freeTagList->parent;
2396 else {
2397 tag = (TAG *)MALLOC(sizeof(TAG));
2398 if (!tag)
2399 return XML_ERROR_NO_MEMORY;
2400 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2401 if (!tag->buf) {
2402 FREE(tag);
2403 return XML_ERROR_NO_MEMORY;
2405 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2407 tag->bindings = NULL;
2408 tag->parent = tagStack;
2409 tagStack = tag;
2410 tag->name.localPart = NULL;
2411 tag->name.prefix = NULL;
2412 tag->rawName = s + enc->minBytesPerChar;
2413 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2414 ++tagLevel;
2416 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2417 const char *fromPtr = tag->rawName;
2418 toPtr = (XML_Char *)tag->buf;
2419 for (;;) {
2420 int bufSize;
2421 int convLen;
2422 XmlConvert(enc,
2423 &fromPtr, rawNameEnd,
2424 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
2425 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2426 if (fromPtr == rawNameEnd) {
2427 tag->name.strLen = convLen;
2428 break;
2430 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2432 char *temp = (char *)REALLOC(tag->buf, bufSize);
2433 if (temp == NULL)
2434 return XML_ERROR_NO_MEMORY;
2435 tag->buf = temp;
2436 tag->bufEnd = temp + bufSize;
2437 toPtr = (XML_Char *)temp + convLen;
2441 tag->name.str = (XML_Char *)tag->buf;
2442 *toPtr = XML_T('\0');
2443 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2444 if (result)
2445 return result;
2446 if (startElementHandler)
2447 startElementHandler(handlerArg, tag->name.str,
2448 (const XML_Char **)atts);
2449 else if (defaultHandler)
2450 reportDefault(parser, enc, s, next);
2451 poolClear(&tempPool);
2452 break;
2454 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2455 /* fall through */
2456 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2458 const char *rawName = s + enc->minBytesPerChar;
2459 enum XML_Error result;
2460 BINDING *bindings = NULL;
2461 XML_Bool noElmHandlers = XML_TRUE;
2462 TAG_NAME name;
2463 name.str = poolStoreString(&tempPool, enc, rawName,
2464 rawName + XmlNameLength(enc, rawName));
2465 if (!name.str)
2466 return XML_ERROR_NO_MEMORY;
2467 poolFinish(&tempPool);
2468 result = storeAtts(parser, enc, s, &name, &bindings);
2469 if (result)
2470 return result;
2471 poolFinish(&tempPool);
2472 if (startElementHandler) {
2473 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2474 noElmHandlers = XML_FALSE;
2476 if (endElementHandler) {
2477 if (startElementHandler)
2478 *eventPP = *eventEndPP;
2479 endElementHandler(handlerArg, name.str);
2480 noElmHandlers = XML_FALSE;
2482 if (noElmHandlers && defaultHandler)
2483 reportDefault(parser, enc, s, next);
2484 poolClear(&tempPool);
2485 while (bindings) {
2486 BINDING *b = bindings;
2487 if (endNamespaceDeclHandler)
2488 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2489 bindings = bindings->nextTagBinding;
2490 b->nextTagBinding = freeBindingList;
2491 freeBindingList = b;
2492 b->prefix->binding = b->prevPrefixBinding;
2495 if (tagLevel == 0)
2496 return epilogProcessor(parser, next, end, nextPtr);
2497 break;
2498 case XML_TOK_END_TAG:
2499 if (tagLevel == startTagLevel)
2500 return XML_ERROR_ASYNC_ENTITY;
2501 else {
2502 int len;
2503 const char *rawName;
2504 TAG *tag = tagStack;
2505 tagStack = tag->parent;
2506 tag->parent = freeTagList;
2507 freeTagList = tag;
2508 rawName = s + enc->minBytesPerChar*2;
2509 len = XmlNameLength(enc, rawName);
2510 if (len != tag->rawNameLength
2511 || memcmp(tag->rawName, rawName, len) != 0) {
2512 *eventPP = rawName;
2513 return XML_ERROR_TAG_MISMATCH;
2515 --tagLevel;
2516 if (endElementHandler) {
2517 const XML_Char *localPart;
2518 const XML_Char *prefix;
2519 XML_Char *uri;
2520 localPart = tag->name.localPart;
2521 if (ns && localPart) {
2522 /* localPart and prefix may have been overwritten in
2523 tag->name.str, since this points to the binding->uri
2524 buffer which gets re-used; so we have to add them again
2526 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2527 /* don't need to check for space - already done in storeAtts() */
2528 while (*localPart) *uri++ = *localPart++;
2529 prefix = (XML_Char *)tag->name.prefix;
2530 if (ns_triplets && prefix) {
2531 *uri++ = namespaceSeparator;
2532 while (*prefix) *uri++ = *prefix++;
2534 *uri = XML_T('\0');
2536 endElementHandler(handlerArg, tag->name.str);
2538 else if (defaultHandler)
2539 reportDefault(parser, enc, s, next);
2540 while (tag->bindings) {
2541 BINDING *b = tag->bindings;
2542 if (endNamespaceDeclHandler)
2543 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2544 tag->bindings = tag->bindings->nextTagBinding;
2545 b->nextTagBinding = freeBindingList;
2546 freeBindingList = b;
2547 b->prefix->binding = b->prevPrefixBinding;
2549 if (tagLevel == 0)
2550 return epilogProcessor(parser, next, end, nextPtr);
2552 break;
2553 case XML_TOK_CHAR_REF:
2555 int n = XmlCharRefNumber(enc, s);
2556 if (n < 0)
2557 return XML_ERROR_BAD_CHAR_REF;
2558 if (characterDataHandler) {
2559 XML_Char buf[XML_ENCODE_MAX];
2560 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2562 else if (defaultHandler)
2563 reportDefault(parser, enc, s, next);
2565 break;
2566 case XML_TOK_XML_DECL:
2567 return XML_ERROR_MISPLACED_XML_PI;
2568 case XML_TOK_DATA_NEWLINE:
2569 if (characterDataHandler) {
2570 XML_Char c = 0xA;
2571 characterDataHandler(handlerArg, &c, 1);
2573 else if (defaultHandler)
2574 reportDefault(parser, enc, s, next);
2575 break;
2576 case XML_TOK_CDATA_SECT_OPEN:
2578 enum XML_Error result;
2579 if (startCdataSectionHandler)
2580 startCdataSectionHandler(handlerArg);
2581 #if 0
2582 /* Suppose you doing a transformation on a document that involves
2583 changing only the character data. You set up a defaultHandler
2584 and a characterDataHandler. The defaultHandler simply copies
2585 characters through. The characterDataHandler does the
2586 transformation and writes the characters out escaping them as
2587 necessary. This case will fail to work if we leave out the
2588 following two lines (because & and < inside CDATA sections will
2589 be incorrectly escaped).
2591 However, now we have a start/endCdataSectionHandler, so it seems
2592 easier to let the user deal with this.
2594 else if (characterDataHandler)
2595 characterDataHandler(handlerArg, dataBuf, 0);
2596 #endif
2597 else if (defaultHandler)
2598 reportDefault(parser, enc, s, next);
2599 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2600 if (result != XML_ERROR_NONE)
2601 return result;
2602 else if (!next) {
2603 processor = cdataSectionProcessor;
2604 return result;
2607 break;
2608 case XML_TOK_TRAILING_RSQB:
2609 if (haveMore) {
2610 *nextPtr = s;
2611 return XML_ERROR_NONE;
2613 if (characterDataHandler) {
2614 if (MUST_CONVERT(enc, s)) {
2615 ICHAR *dataPtr = (ICHAR *)dataBuf;
2616 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2617 characterDataHandler(handlerArg, dataBuf,
2618 (int)(dataPtr - (ICHAR *)dataBuf));
2620 else
2621 characterDataHandler(handlerArg,
2622 (XML_Char *)s,
2623 (int)((XML_Char *)end - (XML_Char *)s));
2625 else if (defaultHandler)
2626 reportDefault(parser, enc, s, end);
2627 /* We are at the end of the final buffer, should we check for
2628 XML_SUSPENDED, XML_FINISHED?
2630 if (startTagLevel == 0) {
2631 *eventPP = end;
2632 return XML_ERROR_NO_ELEMENTS;
2634 if (tagLevel != startTagLevel) {
2635 *eventPP = end;
2636 return XML_ERROR_ASYNC_ENTITY;
2638 *nextPtr = end;
2639 return XML_ERROR_NONE;
2640 case XML_TOK_DATA_CHARS:
2642 XML_CharacterDataHandler charDataHandler = characterDataHandler;
2643 if (charDataHandler) {
2644 if (MUST_CONVERT(enc, s)) {
2645 for (;;) {
2646 ICHAR *dataPtr = (ICHAR *)dataBuf;
2647 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
2648 *eventEndPP = s;
2649 charDataHandler(handlerArg, dataBuf,
2650 (int)(dataPtr - (ICHAR *)dataBuf));
2651 if (s == next)
2652 break;
2653 *eventPP = s;
2656 else
2657 charDataHandler(handlerArg,
2658 (XML_Char *)s,
2659 (int)((XML_Char *)next - (XML_Char *)s));
2661 else if (defaultHandler)
2662 reportDefault(parser, enc, s, next);
2664 break;
2665 case XML_TOK_PI:
2666 if (!reportProcessingInstruction(parser, enc, s, next))
2667 return XML_ERROR_NO_MEMORY;
2668 break;
2669 case XML_TOK_COMMENT:
2670 if (!reportComment(parser, enc, s, next))
2671 return XML_ERROR_NO_MEMORY;
2672 break;
2673 default:
2674 if (defaultHandler)
2675 reportDefault(parser, enc, s, next);
2676 break;
2678 *eventPP = s = next;
2679 switch (ps_parsing) {
2680 case XML_SUSPENDED:
2681 *nextPtr = next;
2682 return XML_ERROR_NONE;
2683 case XML_FINISHED:
2684 return XML_ERROR_ABORTED;
2685 default: ;
2688 /* not reached */
2691 /* Precondition: all arguments must be non-NULL;
2692 Purpose:
2693 - normalize attributes
2694 - check attributes for well-formedness
2695 - generate namespace aware attribute names (URI, prefix)
2696 - build list of attributes for startElementHandler
2697 - default attributes
2698 - process namespace declarations (check and report them)
2699 - generate namespace aware element name (URI, prefix)
2701 static enum XML_Error
2702 storeAtts(XML_Parser parser, const ENCODING *enc,
2703 const char *attStr, TAG_NAME *tagNamePtr,
2704 BINDING **bindingsPtr)
2706 DTD * const dtd = _dtd; /* save one level of indirection */
2707 ELEMENT_TYPE *elementType;
2708 int nDefaultAtts;
2709 const XML_Char **appAtts; /* the attribute list for the application */
2710 int attIndex = 0;
2711 int prefixLen;
2712 int i;
2713 int n;
2714 XML_Char *uri;
2715 int nPrefixes = 0;
2716 BINDING *binding;
2717 const XML_Char *localPart;
2719 /* lookup the element type name */
2720 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
2721 if (!elementType) {
2722 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
2723 if (!name)
2724 return XML_ERROR_NO_MEMORY;
2725 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
2726 sizeof(ELEMENT_TYPE));
2727 if (!elementType)
2728 return XML_ERROR_NO_MEMORY;
2729 if (ns && !setElementTypePrefix(parser, elementType))
2730 return XML_ERROR_NO_MEMORY;
2732 nDefaultAtts = elementType->nDefaultAtts;
2734 /* get the attributes from the tokenizer */
2735 n = XmlGetAttributes(enc, attStr, attsSize, atts);
2736 if (n + nDefaultAtts > attsSize) {
2737 int oldAttsSize = attsSize;
2738 ATTRIBUTE *temp;
2739 #ifdef XML_ATTR_INFO
2740 XML_AttrInfo *temp2;
2741 #endif
2742 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
2743 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
2744 if (temp == NULL)
2745 return XML_ERROR_NO_MEMORY;
2746 atts = temp;
2747 #ifdef XML_ATTR_INFO
2748 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
2749 if (temp2 == NULL)
2750 return XML_ERROR_NO_MEMORY;
2751 attInfo = temp2;
2752 #endif
2753 if (n > oldAttsSize)
2754 XmlGetAttributes(enc, attStr, n, atts);
2757 appAtts = (const XML_Char **)atts;
2758 for (i = 0; i < n; i++) {
2759 ATTRIBUTE *currAtt = &atts[i];
2760 #ifdef XML_ATTR_INFO
2761 XML_AttrInfo *currAttInfo = &attInfo[i];
2762 #endif
2763 /* add the name and value to the attribute list */
2764 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
2765 currAtt->name
2766 + XmlNameLength(enc, currAtt->name));
2767 if (!attId)
2768 return XML_ERROR_NO_MEMORY;
2769 #ifdef XML_ATTR_INFO
2770 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
2771 currAttInfo->nameEnd = currAttInfo->nameStart +
2772 XmlNameLength(enc, currAtt->name);
2773 currAttInfo->valueStart = parseEndByteIndex -
2774 (parseEndPtr - currAtt->valuePtr);
2775 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
2776 #endif
2777 /* Detect duplicate attributes by their QNames. This does not work when
2778 namespace processing is turned on and different prefixes for the same
2779 namespace are used. For this case we have a check further down.
2781 if ((attId->name)[-1]) {
2782 if (enc == encoding)
2783 eventPtr = atts[i].name;
2784 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2786 (attId->name)[-1] = 1;
2787 appAtts[attIndex++] = attId->name;
2788 if (!atts[i].normalized) {
2789 enum XML_Error result;
2790 XML_Bool isCdata = XML_TRUE;
2792 /* figure out whether declared as other than CDATA */
2793 if (attId->maybeTokenized) {
2794 int j;
2795 for (j = 0; j < nDefaultAtts; j++) {
2796 if (attId == elementType->defaultAtts[j].id) {
2797 isCdata = elementType->defaultAtts[j].isCdata;
2798 break;
2803 /* normalize the attribute value */
2804 result = storeAttributeValue(parser, enc, isCdata,
2805 atts[i].valuePtr, atts[i].valueEnd,
2806 &tempPool);
2807 if (result)
2808 return result;
2809 appAtts[attIndex] = poolStart(&tempPool);
2810 poolFinish(&tempPool);
2812 else {
2813 /* the value did not need normalizing */
2814 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
2815 atts[i].valueEnd);
2816 if (appAtts[attIndex] == 0)
2817 return XML_ERROR_NO_MEMORY;
2818 poolFinish(&tempPool);
2820 /* handle prefixed attribute names */
2821 if (attId->prefix) {
2822 if (attId->xmlns) {
2823 /* deal with namespace declarations here */
2824 enum XML_Error result = addBinding(parser, attId->prefix, attId,
2825 appAtts[attIndex], bindingsPtr);
2826 if (result)
2827 return result;
2828 --attIndex;
2830 else {
2831 /* deal with other prefixed names later */
2832 attIndex++;
2833 nPrefixes++;
2834 (attId->name)[-1] = 2;
2837 else
2838 attIndex++;
2841 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
2842 nSpecifiedAtts = attIndex;
2843 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
2844 for (i = 0; i < attIndex; i += 2)
2845 if (appAtts[i] == elementType->idAtt->name) {
2846 idAttIndex = i;
2847 break;
2850 else
2851 idAttIndex = -1;
2853 /* do attribute defaulting */
2854 for (i = 0; i < nDefaultAtts; i++) {
2855 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
2856 if (!(da->id->name)[-1] && da->value) {
2857 if (da->id->prefix) {
2858 if (da->id->xmlns) {
2859 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
2860 da->value, bindingsPtr);
2861 if (result)
2862 return result;
2864 else {
2865 (da->id->name)[-1] = 2;
2866 nPrefixes++;
2867 appAtts[attIndex++] = da->id->name;
2868 appAtts[attIndex++] = da->value;
2871 else {
2872 (da->id->name)[-1] = 1;
2873 appAtts[attIndex++] = da->id->name;
2874 appAtts[attIndex++] = da->value;
2878 appAtts[attIndex] = 0;
2880 /* expand prefixed attribute names, check for duplicates,
2881 and clear flags that say whether attributes were specified */
2882 i = 0;
2883 if (nPrefixes) {
2884 int j; /* hash table index */
2885 unsigned long version = nsAttsVersion;
2886 int nsAttsSize = (int)1 << nsAttsPower;
2887 /* size of hash table must be at least 2 * (# of prefixed attributes) */
2888 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
2889 NS_ATT *temp;
2890 /* hash table size must also be a power of 2 and >= 8 */
2891 while (nPrefixes >> nsAttsPower++);
2892 if (nsAttsPower < 3)
2893 nsAttsPower = 3;
2894 nsAttsSize = (int)1 << nsAttsPower;
2895 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
2896 if (!temp)
2897 return XML_ERROR_NO_MEMORY;
2898 nsAtts = temp;
2899 version = 0; /* force re-initialization of nsAtts hash table */
2901 /* using a version flag saves us from initializing nsAtts every time */
2902 if (!version) { /* initialize version flags when version wraps around */
2903 version = INIT_ATTS_VERSION;
2904 for (j = nsAttsSize; j != 0; )
2905 nsAtts[--j].version = version;
2907 nsAttsVersion = --version;
2909 /* expand prefixed names and check for duplicates */
2910 for (; i < attIndex; i += 2) {
2911 const XML_Char *s = appAtts[i];
2912 if (s[-1] == 2) { /* prefixed */
2913 ATTRIBUTE_ID *id;
2914 const BINDING *b;
2915 unsigned long uriHash = hash_secret_salt;
2916 ((XML_Char *)s)[-1] = 0; /* clear flag */
2917 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
2918 b = id->prefix->binding;
2919 if (!b)
2920 return XML_ERROR_UNBOUND_PREFIX;
2922 /* as we expand the name we also calculate its hash value */
2923 for (j = 0; j < b->uriLen; j++) {
2924 const XML_Char c = b->uri[j];
2925 if (!poolAppendChar(&tempPool, c))
2926 return XML_ERROR_NO_MEMORY;
2927 uriHash = CHAR_HASH(uriHash, c);
2929 while (*s++ != XML_T(ASCII_COLON))
2931 do { /* copies null terminator */
2932 const XML_Char c = *s;
2933 if (!poolAppendChar(&tempPool, *s))
2934 return XML_ERROR_NO_MEMORY;
2935 uriHash = CHAR_HASH(uriHash, c);
2936 } while (*s++);
2938 { /* Check hash table for duplicate of expanded name (uriName).
2939 Derived from code in lookup(parser, HASH_TABLE *table, ...).
2941 unsigned char step = 0;
2942 unsigned long mask = nsAttsSize - 1;
2943 j = uriHash & mask; /* index into hash table */
2944 while (nsAtts[j].version == version) {
2945 /* for speed we compare stored hash values first */
2946 if (uriHash == nsAtts[j].hash) {
2947 const XML_Char *s1 = poolStart(&tempPool);
2948 const XML_Char *s2 = nsAtts[j].uriName;
2949 /* s1 is null terminated, but not s2 */
2950 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
2951 if (*s1 == 0)
2952 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2954 if (!step)
2955 step = PROBE_STEP(uriHash, mask, nsAttsPower);
2956 j < step ? (j += nsAttsSize - step) : (j -= step);
2960 if (ns_triplets) { /* append namespace separator and prefix */
2961 tempPool.ptr[-1] = namespaceSeparator;
2962 s = b->prefix->name;
2963 do {
2964 if (!poolAppendChar(&tempPool, *s))
2965 return XML_ERROR_NO_MEMORY;
2966 } while (*s++);
2969 /* store expanded name in attribute list */
2970 s = poolStart(&tempPool);
2971 poolFinish(&tempPool);
2972 appAtts[i] = s;
2974 /* fill empty slot with new version, uriName and hash value */
2975 nsAtts[j].version = version;
2976 nsAtts[j].hash = uriHash;
2977 nsAtts[j].uriName = s;
2979 if (!--nPrefixes) {
2980 i += 2;
2981 break;
2984 else /* not prefixed */
2985 ((XML_Char *)s)[-1] = 0; /* clear flag */
2988 /* clear flags for the remaining attributes */
2989 for (; i < attIndex; i += 2)
2990 ((XML_Char *)(appAtts[i]))[-1] = 0;
2991 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
2992 binding->attId->name[-1] = 0;
2994 if (!ns)
2995 return XML_ERROR_NONE;
2997 /* expand the element type name */
2998 if (elementType->prefix) {
2999 binding = elementType->prefix->binding;
3000 if (!binding)
3001 return XML_ERROR_UNBOUND_PREFIX;
3002 localPart = tagNamePtr->str;
3003 while (*localPart++ != XML_T(ASCII_COLON))
3006 else if (dtd->defaultPrefix.binding) {
3007 binding = dtd->defaultPrefix.binding;
3008 localPart = tagNamePtr->str;
3010 else
3011 return XML_ERROR_NONE;
3012 prefixLen = 0;
3013 if (ns_triplets && binding->prefix->name) {
3014 for (; binding->prefix->name[prefixLen++];)
3015 ; /* prefixLen includes null terminator */
3017 tagNamePtr->localPart = localPart;
3018 tagNamePtr->uriLen = binding->uriLen;
3019 tagNamePtr->prefix = binding->prefix->name;
3020 tagNamePtr->prefixLen = prefixLen;
3021 for (i = 0; localPart[i++];)
3022 ; /* i includes null terminator */
3023 n = i + binding->uriLen + prefixLen;
3024 if (n > binding->uriAlloc) {
3025 TAG *p;
3026 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
3027 if (!uri)
3028 return XML_ERROR_NO_MEMORY;
3029 binding->uriAlloc = n + EXPAND_SPARE;
3030 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3031 for (p = tagStack; p; p = p->parent)
3032 if (p->name.str == binding->uri)
3033 p->name.str = uri;
3034 FREE(binding->uri);
3035 binding->uri = uri;
3037 /* if namespaceSeparator != '\0' then uri includes it already */
3038 uri = binding->uri + binding->uriLen;
3039 memcpy(uri, localPart, i * sizeof(XML_Char));
3040 /* we always have a namespace separator between localPart and prefix */
3041 if (prefixLen) {
3042 uri += i - 1;
3043 *uri = namespaceSeparator; /* replace null terminator */
3044 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3046 tagNamePtr->str = binding->uri;
3047 return XML_ERROR_NONE;
3050 /* addBinding() overwrites the value of prefix->binding without checking.
3051 Therefore one must keep track of the old value outside of addBinding().
3053 static enum XML_Error
3054 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3055 const XML_Char *uri, BINDING **bindingsPtr)
3057 static const XML_Char xmlNamespace[] = {
3058 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3059 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3060 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3061 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3062 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3063 ASCII_e, '\0'
3065 static const int xmlLen =
3066 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3067 static const XML_Char xmlnsNamespace[] = {
3068 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3069 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3070 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3071 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3072 ASCII_SLASH, '\0'
3074 static const int xmlnsLen =
3075 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3077 XML_Bool mustBeXML = XML_FALSE;
3078 XML_Bool isXML = XML_TRUE;
3079 XML_Bool isXMLNS = XML_TRUE;
3081 BINDING *b;
3082 int len;
3084 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3085 if (*uri == XML_T('\0') && prefix->name)
3086 return XML_ERROR_UNDECLARING_PREFIX;
3088 if (prefix->name
3089 && prefix->name[0] == XML_T(ASCII_x)
3090 && prefix->name[1] == XML_T(ASCII_m)
3091 && prefix->name[2] == XML_T(ASCII_l)) {
3093 /* Not allowed to bind xmlns */
3094 if (prefix->name[3] == XML_T(ASCII_n)
3095 && prefix->name[4] == XML_T(ASCII_s)
3096 && prefix->name[5] == XML_T('\0'))
3097 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3099 if (prefix->name[3] == XML_T('\0'))
3100 mustBeXML = XML_TRUE;
3103 for (len = 0; uri[len]; len++) {
3104 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3105 isXML = XML_FALSE;
3107 if (!mustBeXML && isXMLNS
3108 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3109 isXMLNS = XML_FALSE;
3111 isXML = isXML && len == xmlLen;
3112 isXMLNS = isXMLNS && len == xmlnsLen;
3114 if (mustBeXML != isXML)
3115 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3116 : XML_ERROR_RESERVED_NAMESPACE_URI;
3118 if (isXMLNS)
3119 return XML_ERROR_RESERVED_NAMESPACE_URI;
3121 if (namespaceSeparator)
3122 len++;
3123 if (freeBindingList) {
3124 b = freeBindingList;
3125 if (len > b->uriAlloc) {
3126 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3127 sizeof(XML_Char) * (len + EXPAND_SPARE));
3128 if (temp == NULL)
3129 return XML_ERROR_NO_MEMORY;
3130 b->uri = temp;
3131 b->uriAlloc = len + EXPAND_SPARE;
3133 freeBindingList = b->nextTagBinding;
3135 else {
3136 b = (BINDING *)MALLOC(sizeof(BINDING));
3137 if (!b)
3138 return XML_ERROR_NO_MEMORY;
3139 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
3140 if (!b->uri) {
3141 FREE(b);
3142 return XML_ERROR_NO_MEMORY;
3144 b->uriAlloc = len + EXPAND_SPARE;
3146 b->uriLen = len;
3147 memcpy(b->uri, uri, len * sizeof(XML_Char));
3148 if (namespaceSeparator)
3149 b->uri[len - 1] = namespaceSeparator;
3150 b->prefix = prefix;
3151 b->attId = attId;
3152 b->prevPrefixBinding = prefix->binding;
3153 /* NULL binding when default namespace undeclared */
3154 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3155 prefix->binding = NULL;
3156 else
3157 prefix->binding = b;
3158 b->nextTagBinding = *bindingsPtr;
3159 *bindingsPtr = b;
3160 /* if attId == NULL then we are not starting a namespace scope */
3161 if (attId && startNamespaceDeclHandler)
3162 startNamespaceDeclHandler(handlerArg, prefix->name,
3163 prefix->binding ? uri : 0);
3164 return XML_ERROR_NONE;
3167 /* The idea here is to avoid using stack for each CDATA section when
3168 the whole file is parsed with one call.
3170 static enum XML_Error PTRCALL
3171 cdataSectionProcessor(XML_Parser parser,
3172 const char *start,
3173 const char *end,
3174 const char **endPtr)
3176 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
3177 endPtr, (XML_Bool)!ps_finalBuffer);
3178 if (result != XML_ERROR_NONE)
3179 return result;
3180 if (start) {
3181 if (parentParser) { /* we are parsing an external entity */
3182 processor = externalEntityContentProcessor;
3183 return externalEntityContentProcessor(parser, start, end, endPtr);
3185 else {
3186 processor = contentProcessor;
3187 return contentProcessor(parser, start, end, endPtr);
3190 return result;
3193 /* startPtr gets set to non-null if the section is closed, and to null if
3194 the section is not yet closed.
3196 static enum XML_Error
3197 doCdataSection(XML_Parser parser,
3198 const ENCODING *enc,
3199 const char **startPtr,
3200 const char *end,
3201 const char **nextPtr,
3202 XML_Bool haveMore)
3204 const char *s = *startPtr;
3205 const char **eventPP;
3206 const char **eventEndPP;
3207 if (enc == encoding) {
3208 eventPP = &eventPtr;
3209 *eventPP = s;
3210 eventEndPP = &eventEndPtr;
3212 else {
3213 eventPP = &(openInternalEntities->internalEventPtr);
3214 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3216 *eventPP = s;
3217 *startPtr = NULL;
3219 for (;;) {
3220 const char *next;
3221 int tok = XmlCdataSectionTok(enc, s, end, &next);
3222 *eventEndPP = next;
3223 switch (tok) {
3224 case XML_TOK_CDATA_SECT_CLOSE:
3225 if (endCdataSectionHandler)
3226 endCdataSectionHandler(handlerArg);
3227 #if 0
3228 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3229 else if (characterDataHandler)
3230 characterDataHandler(handlerArg, dataBuf, 0);
3231 #endif
3232 else if (defaultHandler)
3233 reportDefault(parser, enc, s, next);
3234 *startPtr = next;
3235 *nextPtr = next;
3236 if (ps_parsing == XML_FINISHED)
3237 return XML_ERROR_ABORTED;
3238 else
3239 return XML_ERROR_NONE;
3240 case XML_TOK_DATA_NEWLINE:
3241 if (characterDataHandler) {
3242 XML_Char c = 0xA;
3243 characterDataHandler(handlerArg, &c, 1);
3245 else if (defaultHandler)
3246 reportDefault(parser, enc, s, next);
3247 break;
3248 case XML_TOK_DATA_CHARS:
3250 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3251 if (charDataHandler) {
3252 if (MUST_CONVERT(enc, s)) {
3253 for (;;) {
3254 ICHAR *dataPtr = (ICHAR *)dataBuf;
3255 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
3256 *eventEndPP = next;
3257 charDataHandler(handlerArg, dataBuf,
3258 (int)(dataPtr - (ICHAR *)dataBuf));
3259 if (s == next)
3260 break;
3261 *eventPP = s;
3264 else
3265 charDataHandler(handlerArg,
3266 (XML_Char *)s,
3267 (int)((XML_Char *)next - (XML_Char *)s));
3269 else if (defaultHandler)
3270 reportDefault(parser, enc, s, next);
3272 break;
3273 case XML_TOK_INVALID:
3274 *eventPP = next;
3275 return XML_ERROR_INVALID_TOKEN;
3276 case XML_TOK_PARTIAL_CHAR:
3277 if (haveMore) {
3278 *nextPtr = s;
3279 return XML_ERROR_NONE;
3281 return XML_ERROR_PARTIAL_CHAR;
3282 case XML_TOK_PARTIAL:
3283 case XML_TOK_NONE:
3284 if (haveMore) {
3285 *nextPtr = s;
3286 return XML_ERROR_NONE;
3288 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3289 default:
3290 *eventPP = next;
3291 return XML_ERROR_UNEXPECTED_STATE;
3294 *eventPP = s = next;
3295 switch (ps_parsing) {
3296 case XML_SUSPENDED:
3297 *nextPtr = next;
3298 return XML_ERROR_NONE;
3299 case XML_FINISHED:
3300 return XML_ERROR_ABORTED;
3301 default: ;
3304 /* not reached */
3307 #ifdef XML_DTD
3309 /* The idea here is to avoid using stack for each IGNORE section when
3310 the whole file is parsed with one call.
3312 static enum XML_Error PTRCALL
3313 ignoreSectionProcessor(XML_Parser parser,
3314 const char *start,
3315 const char *end,
3316 const char **endPtr)
3318 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
3319 endPtr, (XML_Bool)!ps_finalBuffer);
3320 if (result != XML_ERROR_NONE)
3321 return result;
3322 if (start) {
3323 processor = prologProcessor;
3324 return prologProcessor(parser, start, end, endPtr);
3326 return result;
3329 /* startPtr gets set to non-null is the section is closed, and to null
3330 if the section is not yet closed.
3332 static enum XML_Error
3333 doIgnoreSection(XML_Parser parser,
3334 const ENCODING *enc,
3335 const char **startPtr,
3336 const char *end,
3337 const char **nextPtr,
3338 XML_Bool haveMore)
3340 const char *next;
3341 int tok;
3342 const char *s = *startPtr;
3343 const char **eventPP;
3344 const char **eventEndPP;
3345 if (enc == encoding) {
3346 eventPP = &eventPtr;
3347 *eventPP = s;
3348 eventEndPP = &eventEndPtr;
3350 else {
3351 eventPP = &(openInternalEntities->internalEventPtr);
3352 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3354 *eventPP = s;
3355 *startPtr = NULL;
3356 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3357 *eventEndPP = next;
3358 switch (tok) {
3359 case XML_TOK_IGNORE_SECT:
3360 if (defaultHandler)
3361 reportDefault(parser, enc, s, next);
3362 *startPtr = next;
3363 *nextPtr = next;
3364 if (ps_parsing == XML_FINISHED)
3365 return XML_ERROR_ABORTED;
3366 else
3367 return XML_ERROR_NONE;
3368 case XML_TOK_INVALID:
3369 *eventPP = next;
3370 return XML_ERROR_INVALID_TOKEN;
3371 case XML_TOK_PARTIAL_CHAR:
3372 if (haveMore) {
3373 *nextPtr = s;
3374 return XML_ERROR_NONE;
3376 return XML_ERROR_PARTIAL_CHAR;
3377 case XML_TOK_PARTIAL:
3378 case XML_TOK_NONE:
3379 if (haveMore) {
3380 *nextPtr = s;
3381 return XML_ERROR_NONE;
3383 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3384 default:
3385 *eventPP = next;
3386 return XML_ERROR_UNEXPECTED_STATE;
3388 /* not reached */
3391 #endif /* XML_DTD */
3393 static enum XML_Error
3394 initializeEncoding(XML_Parser parser)
3396 const char *s;
3397 #ifdef XML_UNICODE
3398 char encodingBuf[128];
3399 if (!protocolEncodingName)
3400 s = NULL;
3401 else {
3402 int i;
3403 for (i = 0; protocolEncodingName[i]; i++) {
3404 if (i == sizeof(encodingBuf) - 1
3405 || (protocolEncodingName[i] & ~0x7f) != 0) {
3406 encodingBuf[0] = '\0';
3407 break;
3409 encodingBuf[i] = (char)protocolEncodingName[i];
3411 encodingBuf[i] = '\0';
3412 s = encodingBuf;
3414 #else
3415 s = protocolEncodingName;
3416 #endif
3417 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3418 return XML_ERROR_NONE;
3419 return handleUnknownEncoding(parser, protocolEncodingName);
3422 static enum XML_Error
3423 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
3424 const char *s, const char *next)
3426 const char *encodingName = NULL;
3427 const XML_Char *storedEncName = NULL;
3428 const ENCODING *newEncoding = NULL;
3429 const char *version = NULL;
3430 const char *versionend;
3431 const XML_Char *storedversion = NULL;
3432 int standalone = -1;
3433 if (!(ns
3434 ? XmlParseXmlDeclNS
3435 : XmlParseXmlDecl)(isGeneralTextEntity,
3436 encoding,
3438 next,
3439 &eventPtr,
3440 &version,
3441 &versionend,
3442 &encodingName,
3443 &newEncoding,
3444 &standalone)) {
3445 if (isGeneralTextEntity)
3446 return XML_ERROR_TEXT_DECL;
3447 else
3448 return XML_ERROR_XML_DECL;
3450 if (!isGeneralTextEntity && standalone == 1) {
3451 _dtd->standalone = XML_TRUE;
3452 #ifdef XML_DTD
3453 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3454 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3455 #endif /* XML_DTD */
3457 if (xmlDeclHandler) {
3458 if (encodingName != NULL) {
3459 storedEncName = poolStoreString(&temp2Pool,
3460 encoding,
3461 encodingName,
3462 encodingName
3463 + XmlNameLength(encoding, encodingName));
3464 if (!storedEncName)
3465 return XML_ERROR_NO_MEMORY;
3466 poolFinish(&temp2Pool);
3468 if (version) {
3469 storedversion = poolStoreString(&temp2Pool,
3470 encoding,
3471 version,
3472 versionend - encoding->minBytesPerChar);
3473 if (!storedversion)
3474 return XML_ERROR_NO_MEMORY;
3476 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
3478 else if (defaultHandler)
3479 reportDefault(parser, encoding, s, next);
3480 if (protocolEncodingName == NULL) {
3481 if (newEncoding) {
3482 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
3483 eventPtr = encodingName;
3484 return XML_ERROR_INCORRECT_ENCODING;
3486 encoding = newEncoding;
3488 else if (encodingName) {
3489 enum XML_Error result;
3490 if (!storedEncName) {
3491 storedEncName = poolStoreString(
3492 &temp2Pool, encoding, encodingName,
3493 encodingName + XmlNameLength(encoding, encodingName));
3494 if (!storedEncName)
3495 return XML_ERROR_NO_MEMORY;
3497 result = handleUnknownEncoding(parser, storedEncName);
3498 poolClear(&temp2Pool);
3499 if (result == XML_ERROR_UNKNOWN_ENCODING)
3500 eventPtr = encodingName;
3501 return result;
3505 if (storedEncName || storedversion)
3506 poolClear(&temp2Pool);
3508 return XML_ERROR_NONE;
3511 static enum XML_Error
3512 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
3514 if (unknownEncodingHandler) {
3515 XML_Encoding info;
3516 int i;
3517 for (i = 0; i < 256; i++)
3518 info.map[i] = -1;
3519 info.convert = NULL;
3520 info.data = NULL;
3521 info.release = NULL;
3522 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
3523 &info)) {
3524 ENCODING *enc;
3525 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
3526 if (!unknownEncodingMem) {
3527 if (info.release)
3528 info.release(info.data);
3529 return XML_ERROR_NO_MEMORY;
3531 enc = (ns
3532 ? XmlInitUnknownEncodingNS
3533 : XmlInitUnknownEncoding)(unknownEncodingMem,
3534 info.map,
3535 info.convert,
3536 info.data);
3537 if (enc) {
3538 unknownEncodingData = info.data;
3539 unknownEncodingRelease = info.release;
3540 encoding = enc;
3541 return XML_ERROR_NONE;
3544 if (info.release != NULL)
3545 info.release(info.data);
3547 return XML_ERROR_UNKNOWN_ENCODING;
3550 static enum XML_Error PTRCALL
3551 prologInitProcessor(XML_Parser parser,
3552 const char *s,
3553 const char *end,
3554 const char **nextPtr)
3556 enum XML_Error result = initializeEncoding(parser);
3557 if (result != XML_ERROR_NONE)
3558 return result;
3559 processor = prologProcessor;
3560 return prologProcessor(parser, s, end, nextPtr);
3563 #ifdef XML_DTD
3565 static enum XML_Error PTRCALL
3566 externalParEntInitProcessor(XML_Parser parser,
3567 const char *s,
3568 const char *end,
3569 const char **nextPtr)
3571 enum XML_Error result = initializeEncoding(parser);
3572 if (result != XML_ERROR_NONE)
3573 return result;
3575 /* we know now that XML_Parse(Buffer) has been called,
3576 so we consider the external parameter entity read */
3577 _dtd->paramEntityRead = XML_TRUE;
3579 if (prologState.inEntityValue) {
3580 processor = entityValueInitProcessor;
3581 return entityValueInitProcessor(parser, s, end, nextPtr);
3583 else {
3584 processor = externalParEntProcessor;
3585 return externalParEntProcessor(parser, s, end, nextPtr);
3589 static enum XML_Error PTRCALL
3590 entityValueInitProcessor(XML_Parser parser,
3591 const char *s,
3592 const char *end,
3593 const char **nextPtr)
3595 int tok;
3596 const char *start = s;
3597 const char *next = start;
3598 eventPtr = start;
3600 for (;;) {
3601 tok = XmlPrologTok(encoding, start, end, &next);
3602 eventEndPtr = next;
3603 if (tok <= 0) {
3604 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3605 *nextPtr = s;
3606 return XML_ERROR_NONE;
3608 switch (tok) {
3609 case XML_TOK_INVALID:
3610 return XML_ERROR_INVALID_TOKEN;
3611 case XML_TOK_PARTIAL:
3612 return XML_ERROR_UNCLOSED_TOKEN;
3613 case XML_TOK_PARTIAL_CHAR:
3614 return XML_ERROR_PARTIAL_CHAR;
3615 case XML_TOK_NONE: /* start == end */
3616 default:
3617 break;
3619 /* found end of entity value - can store it now */
3620 return storeEntityValue(parser, encoding, s, end);
3622 else if (tok == XML_TOK_XML_DECL) {
3623 enum XML_Error result;
3624 result = processXmlDecl(parser, 0, start, next);
3625 if (result != XML_ERROR_NONE)
3626 return result;
3627 switch (ps_parsing) {
3628 case XML_SUSPENDED:
3629 *nextPtr = next;
3630 return XML_ERROR_NONE;
3631 case XML_FINISHED:
3632 return XML_ERROR_ABORTED;
3633 default:
3634 *nextPtr = next;
3636 /* stop scanning for text declaration - we found one */
3637 processor = entityValueProcessor;
3638 return entityValueProcessor(parser, next, end, nextPtr);
3640 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3641 return XML_TOK_NONE on the next call, which would then cause the
3642 function to exit with *nextPtr set to s - that is what we want for other
3643 tokens, but not for the BOM - we would rather like to skip it;
3644 then, when this routine is entered the next time, XmlPrologTok will
3645 return XML_TOK_INVALID, since the BOM is still in the buffer
3647 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
3648 *nextPtr = next;
3649 return XML_ERROR_NONE;
3651 start = next;
3652 eventPtr = start;
3656 static enum XML_Error PTRCALL
3657 externalParEntProcessor(XML_Parser parser,
3658 const char *s,
3659 const char *end,
3660 const char **nextPtr)
3662 const char *next = s;
3663 int tok;
3665 tok = XmlPrologTok(encoding, s, end, &next);
3666 if (tok <= 0) {
3667 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3668 *nextPtr = s;
3669 return XML_ERROR_NONE;
3671 switch (tok) {
3672 case XML_TOK_INVALID:
3673 return XML_ERROR_INVALID_TOKEN;
3674 case XML_TOK_PARTIAL:
3675 return XML_ERROR_UNCLOSED_TOKEN;
3676 case XML_TOK_PARTIAL_CHAR:
3677 return XML_ERROR_PARTIAL_CHAR;
3678 case XML_TOK_NONE: /* start == end */
3679 default:
3680 break;
3683 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
3684 However, when parsing an external subset, doProlog will not accept a BOM
3685 as valid, and report a syntax error, so we have to skip the BOM
3687 else if (tok == XML_TOK_BOM) {
3688 s = next;
3689 tok = XmlPrologTok(encoding, s, end, &next);
3692 processor = prologProcessor;
3693 return doProlog(parser, encoding, s, end, tok, next,
3694 nextPtr, (XML_Bool)!ps_finalBuffer);
3697 static enum XML_Error PTRCALL
3698 entityValueProcessor(XML_Parser parser,
3699 const char *s,
3700 const char *end,
3701 const char **nextPtr)
3703 const char *start = s;
3704 const char *next = s;
3705 const ENCODING *enc = encoding;
3706 int tok;
3708 for (;;) {
3709 tok = XmlPrologTok(enc, start, end, &next);
3710 if (tok <= 0) {
3711 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
3712 *nextPtr = s;
3713 return XML_ERROR_NONE;
3715 switch (tok) {
3716 case XML_TOK_INVALID:
3717 return XML_ERROR_INVALID_TOKEN;
3718 case XML_TOK_PARTIAL:
3719 return XML_ERROR_UNCLOSED_TOKEN;
3720 case XML_TOK_PARTIAL_CHAR:
3721 return XML_ERROR_PARTIAL_CHAR;
3722 case XML_TOK_NONE: /* start == end */
3723 default:
3724 break;
3726 /* found end of entity value - can store it now */
3727 return storeEntityValue(parser, enc, s, end);
3729 start = next;
3733 #endif /* XML_DTD */
3735 static enum XML_Error PTRCALL
3736 prologProcessor(XML_Parser parser,
3737 const char *s,
3738 const char *end,
3739 const char **nextPtr)
3741 const char *next = s;
3742 int tok = XmlPrologTok(encoding, s, end, &next);
3743 return doProlog(parser, encoding, s, end, tok, next,
3744 nextPtr, (XML_Bool)!ps_finalBuffer);
3747 static enum XML_Error
3748 doProlog(XML_Parser parser,
3749 const ENCODING *enc,
3750 const char *s,
3751 const char *end,
3752 int tok,
3753 const char *next,
3754 const char **nextPtr,
3755 XML_Bool haveMore)
3757 #ifdef XML_DTD
3758 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
3759 #endif /* XML_DTD */
3760 static const XML_Char atypeCDATA[] =
3761 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
3762 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
3763 static const XML_Char atypeIDREF[] =
3764 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
3765 static const XML_Char atypeIDREFS[] =
3766 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
3767 static const XML_Char atypeENTITY[] =
3768 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
3769 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
3770 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
3771 static const XML_Char atypeNMTOKEN[] = {
3772 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
3773 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
3774 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
3775 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
3776 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
3777 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
3778 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
3780 /* save one level of indirection */
3781 DTD * const dtd = _dtd;
3783 const char **eventPP;
3784 const char **eventEndPP;
3785 enum XML_Content_Quant quant;
3787 if (enc == encoding) {
3788 eventPP = &eventPtr;
3789 eventEndPP = &eventEndPtr;
3791 else {
3792 eventPP = &(openInternalEntities->internalEventPtr);
3793 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3796 for (;;) {
3797 int role;
3798 XML_Bool handleDefault = XML_TRUE;
3799 *eventPP = s;
3800 *eventEndPP = next;
3801 if (tok <= 0) {
3802 if (haveMore && tok != XML_TOK_INVALID) {
3803 *nextPtr = s;
3804 return XML_ERROR_NONE;
3806 switch (tok) {
3807 case XML_TOK_INVALID:
3808 *eventPP = next;
3809 return XML_ERROR_INVALID_TOKEN;
3810 case XML_TOK_PARTIAL:
3811 return XML_ERROR_UNCLOSED_TOKEN;
3812 case XML_TOK_PARTIAL_CHAR:
3813 return XML_ERROR_PARTIAL_CHAR;
3814 case -XML_TOK_PROLOG_S:
3815 tok = -tok;
3816 break;
3817 case XML_TOK_NONE:
3818 #ifdef XML_DTD
3819 /* for internal PE NOT referenced between declarations */
3820 if (enc != encoding && !openInternalEntities->betweenDecl) {
3821 *nextPtr = s;
3822 return XML_ERROR_NONE;
3824 /* WFC: PE Between Declarations - must check that PE contains
3825 complete markup, not only for external PEs, but also for
3826 internal PEs if the reference occurs between declarations.
3828 if (isParamEntity || enc != encoding) {
3829 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
3830 == XML_ROLE_ERROR)
3831 return XML_ERROR_INCOMPLETE_PE;
3832 *nextPtr = s;
3833 return XML_ERROR_NONE;
3835 #endif /* XML_DTD */
3836 return XML_ERROR_NO_ELEMENTS;
3837 default:
3838 tok = -tok;
3839 next = end;
3840 break;
3843 role = XmlTokenRole(&prologState, tok, s, next, enc);
3844 switch (role) {
3845 case XML_ROLE_XML_DECL:
3847 enum XML_Error result = processXmlDecl(parser, 0, s, next);
3848 if (result != XML_ERROR_NONE)
3849 return result;
3850 enc = encoding;
3851 handleDefault = XML_FALSE;
3853 break;
3854 case XML_ROLE_DOCTYPE_NAME:
3855 if (startDoctypeDeclHandler) {
3856 doctypeName = poolStoreString(&tempPool, enc, s, next);
3857 if (!doctypeName)
3858 return XML_ERROR_NO_MEMORY;
3859 poolFinish(&tempPool);
3860 doctypePubid = NULL;
3861 handleDefault = XML_FALSE;
3863 doctypeSysid = NULL; /* always initialize to NULL */
3864 break;
3865 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
3866 if (startDoctypeDeclHandler) {
3867 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
3868 doctypePubid, 1);
3869 doctypeName = NULL;
3870 poolClear(&tempPool);
3871 handleDefault = XML_FALSE;
3873 break;
3874 #ifdef XML_DTD
3875 case XML_ROLE_TEXT_DECL:
3877 enum XML_Error result = processXmlDecl(parser, 1, s, next);
3878 if (result != XML_ERROR_NONE)
3879 return result;
3880 enc = encoding;
3881 handleDefault = XML_FALSE;
3883 break;
3884 #endif /* XML_DTD */
3885 case XML_ROLE_DOCTYPE_PUBLIC_ID:
3886 #ifdef XML_DTD
3887 useForeignDTD = XML_FALSE;
3888 declEntity = (ENTITY *)lookup(parser,
3889 &dtd->paramEntities,
3890 externalSubsetName,
3891 sizeof(ENTITY));
3892 if (!declEntity)
3893 return XML_ERROR_NO_MEMORY;
3894 #endif /* XML_DTD */
3895 dtd->hasParamEntityRefs = XML_TRUE;
3896 if (startDoctypeDeclHandler) {
3897 XML_Char *pubId;
3898 if (!XmlIsPublicId(enc, s, next, eventPP))
3899 return XML_ERROR_PUBLICID;
3900 pubId = poolStoreString(&tempPool, enc,
3901 s + enc->minBytesPerChar,
3902 next - enc->minBytesPerChar);
3903 if (!pubId)
3904 return XML_ERROR_NO_MEMORY;
3905 normalizePublicId(pubId);
3906 poolFinish(&tempPool);
3907 doctypePubid = pubId;
3908 handleDefault = XML_FALSE;
3909 goto alreadyChecked;
3911 /* fall through */
3912 case XML_ROLE_ENTITY_PUBLIC_ID:
3913 if (!XmlIsPublicId(enc, s, next, eventPP))
3914 return XML_ERROR_PUBLICID;
3915 alreadyChecked:
3916 if (dtd->keepProcessing && declEntity) {
3917 XML_Char *tem = poolStoreString(&dtd->pool,
3918 enc,
3919 s + enc->minBytesPerChar,
3920 next - enc->minBytesPerChar);
3921 if (!tem)
3922 return XML_ERROR_NO_MEMORY;
3923 normalizePublicId(tem);
3924 declEntity->publicId = tem;
3925 poolFinish(&dtd->pool);
3926 if (entityDeclHandler)
3927 handleDefault = XML_FALSE;
3929 break;
3930 case XML_ROLE_DOCTYPE_CLOSE:
3931 if (doctypeName) {
3932 startDoctypeDeclHandler(handlerArg, doctypeName,
3933 doctypeSysid, doctypePubid, 0);
3934 poolClear(&tempPool);
3935 handleDefault = XML_FALSE;
3937 /* doctypeSysid will be non-NULL in the case of a previous
3938 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
3939 was not set, indicating an external subset
3941 #ifdef XML_DTD
3942 if (doctypeSysid || useForeignDTD) {
3943 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3944 dtd->hasParamEntityRefs = XML_TRUE;
3945 if (paramEntityParsing && externalEntityRefHandler) {
3946 ENTITY *entity = (ENTITY *)lookup(parser,
3947 &dtd->paramEntities,
3948 externalSubsetName,
3949 sizeof(ENTITY));
3950 if (!entity)
3951 return XML_ERROR_NO_MEMORY;
3952 if (useForeignDTD)
3953 entity->base = curBase;
3954 dtd->paramEntityRead = XML_FALSE;
3955 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
3957 entity->base,
3958 entity->systemId,
3959 entity->publicId))
3960 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3961 if (dtd->paramEntityRead) {
3962 if (!dtd->standalone &&
3963 notStandaloneHandler &&
3964 !notStandaloneHandler(handlerArg))
3965 return XML_ERROR_NOT_STANDALONE;
3967 /* if we didn't read the foreign DTD then this means that there
3968 is no external subset and we must reset dtd->hasParamEntityRefs
3970 else if (!doctypeSysid)
3971 dtd->hasParamEntityRefs = hadParamEntityRefs;
3972 /* end of DTD - no need to update dtd->keepProcessing */
3974 useForeignDTD = XML_FALSE;
3976 #endif /* XML_DTD */
3977 if (endDoctypeDeclHandler) {
3978 endDoctypeDeclHandler(handlerArg);
3979 handleDefault = XML_FALSE;
3981 break;
3982 case XML_ROLE_INSTANCE_START:
3983 #ifdef XML_DTD
3984 /* if there is no DOCTYPE declaration then now is the
3985 last chance to read the foreign DTD
3987 if (useForeignDTD) {
3988 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3989 dtd->hasParamEntityRefs = XML_TRUE;
3990 if (paramEntityParsing && externalEntityRefHandler) {
3991 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
3992 externalSubsetName,
3993 sizeof(ENTITY));
3994 if (!entity)
3995 return XML_ERROR_NO_MEMORY;
3996 entity->base = curBase;
3997 dtd->paramEntityRead = XML_FALSE;
3998 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4000 entity->base,
4001 entity->systemId,
4002 entity->publicId))
4003 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4004 if (dtd->paramEntityRead) {
4005 if (!dtd->standalone &&
4006 notStandaloneHandler &&
4007 !notStandaloneHandler(handlerArg))
4008 return XML_ERROR_NOT_STANDALONE;
4010 /* if we didn't read the foreign DTD then this means that there
4011 is no external subset and we must reset dtd->hasParamEntityRefs
4013 else
4014 dtd->hasParamEntityRefs = hadParamEntityRefs;
4015 /* end of DTD - no need to update dtd->keepProcessing */
4018 #endif /* XML_DTD */
4019 processor = contentProcessor;
4020 return contentProcessor(parser, s, end, nextPtr);
4021 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4022 declElementType = getElementType(parser, enc, s, next);
4023 if (!declElementType)
4024 return XML_ERROR_NO_MEMORY;
4025 goto checkAttListDeclHandler;
4026 case XML_ROLE_ATTRIBUTE_NAME:
4027 declAttributeId = getAttributeId(parser, enc, s, next);
4028 if (!declAttributeId)
4029 return XML_ERROR_NO_MEMORY;
4030 declAttributeIsCdata = XML_FALSE;
4031 declAttributeType = NULL;
4032 declAttributeIsId = XML_FALSE;
4033 goto checkAttListDeclHandler;
4034 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4035 declAttributeIsCdata = XML_TRUE;
4036 declAttributeType = atypeCDATA;
4037 goto checkAttListDeclHandler;
4038 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4039 declAttributeIsId = XML_TRUE;
4040 declAttributeType = atypeID;
4041 goto checkAttListDeclHandler;
4042 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4043 declAttributeType = atypeIDREF;
4044 goto checkAttListDeclHandler;
4045 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4046 declAttributeType = atypeIDREFS;
4047 goto checkAttListDeclHandler;
4048 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4049 declAttributeType = atypeENTITY;
4050 goto checkAttListDeclHandler;
4051 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4052 declAttributeType = atypeENTITIES;
4053 goto checkAttListDeclHandler;
4054 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4055 declAttributeType = atypeNMTOKEN;
4056 goto checkAttListDeclHandler;
4057 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4058 declAttributeType = atypeNMTOKENS;
4059 checkAttListDeclHandler:
4060 if (dtd->keepProcessing && attlistDeclHandler)
4061 handleDefault = XML_FALSE;
4062 break;
4063 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4064 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4065 if (dtd->keepProcessing && attlistDeclHandler) {
4066 const XML_Char *prefix;
4067 if (declAttributeType) {
4068 prefix = enumValueSep;
4070 else {
4071 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4072 ? notationPrefix
4073 : enumValueStart);
4075 if (!poolAppendString(&tempPool, prefix))
4076 return XML_ERROR_NO_MEMORY;
4077 if (!poolAppend(&tempPool, enc, s, next))
4078 return XML_ERROR_NO_MEMORY;
4079 declAttributeType = tempPool.start;
4080 handleDefault = XML_FALSE;
4082 break;
4083 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4084 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4085 if (dtd->keepProcessing) {
4086 if (!defineAttribute(declElementType, declAttributeId,
4087 declAttributeIsCdata, declAttributeIsId,
4088 0, parser))
4089 return XML_ERROR_NO_MEMORY;
4090 if (attlistDeclHandler && declAttributeType) {
4091 if (*declAttributeType == XML_T(ASCII_LPAREN)
4092 || (*declAttributeType == XML_T(ASCII_N)
4093 && declAttributeType[1] == XML_T(ASCII_O))) {
4094 /* Enumerated or Notation type */
4095 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
4096 || !poolAppendChar(&tempPool, XML_T('\0')))
4097 return XML_ERROR_NO_MEMORY;
4098 declAttributeType = tempPool.start;
4099 poolFinish(&tempPool);
4101 *eventEndPP = s;
4102 attlistDeclHandler(handlerArg, declElementType->name,
4103 declAttributeId->name, declAttributeType,
4104 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4105 poolClear(&tempPool);
4106 handleDefault = XML_FALSE;
4109 break;
4110 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4111 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4112 if (dtd->keepProcessing) {
4113 const XML_Char *attVal;
4114 enum XML_Error result =
4115 storeAttributeValue(parser, enc, declAttributeIsCdata,
4116 s + enc->minBytesPerChar,
4117 next - enc->minBytesPerChar,
4118 &dtd->pool);
4119 if (result)
4120 return result;
4121 attVal = poolStart(&dtd->pool);
4122 poolFinish(&dtd->pool);
4123 /* ID attributes aren't allowed to have a default */
4124 if (!defineAttribute(declElementType, declAttributeId,
4125 declAttributeIsCdata, XML_FALSE, attVal, parser))
4126 return XML_ERROR_NO_MEMORY;
4127 if (attlistDeclHandler && declAttributeType) {
4128 if (*declAttributeType == XML_T(ASCII_LPAREN)
4129 || (*declAttributeType == XML_T(ASCII_N)
4130 && declAttributeType[1] == XML_T(ASCII_O))) {
4131 /* Enumerated or Notation type */
4132 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
4133 || !poolAppendChar(&tempPool, XML_T('\0')))
4134 return XML_ERROR_NO_MEMORY;
4135 declAttributeType = tempPool.start;
4136 poolFinish(&tempPool);
4138 *eventEndPP = s;
4139 attlistDeclHandler(handlerArg, declElementType->name,
4140 declAttributeId->name, declAttributeType,
4141 attVal,
4142 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4143 poolClear(&tempPool);
4144 handleDefault = XML_FALSE;
4147 break;
4148 case XML_ROLE_ENTITY_VALUE:
4149 if (dtd->keepProcessing) {
4150 enum XML_Error result = storeEntityValue(parser, enc,
4151 s + enc->minBytesPerChar,
4152 next - enc->minBytesPerChar);
4153 if (declEntity) {
4154 declEntity->textPtr = poolStart(&dtd->entityValuePool);
4155 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
4156 poolFinish(&dtd->entityValuePool);
4157 if (entityDeclHandler) {
4158 *eventEndPP = s;
4159 entityDeclHandler(handlerArg,
4160 declEntity->name,
4161 declEntity->is_param,
4162 declEntity->textPtr,
4163 declEntity->textLen,
4164 curBase, 0, 0, 0);
4165 handleDefault = XML_FALSE;
4168 else
4169 poolDiscard(&dtd->entityValuePool);
4170 if (result != XML_ERROR_NONE)
4171 return result;
4173 break;
4174 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4175 #ifdef XML_DTD
4176 useForeignDTD = XML_FALSE;
4177 #endif /* XML_DTD */
4178 dtd->hasParamEntityRefs = XML_TRUE;
4179 if (startDoctypeDeclHandler) {
4180 doctypeSysid = poolStoreString(&tempPool, enc,
4181 s + enc->minBytesPerChar,
4182 next - enc->minBytesPerChar);
4183 if (doctypeSysid == NULL)
4184 return XML_ERROR_NO_MEMORY;
4185 poolFinish(&tempPool);
4186 handleDefault = XML_FALSE;
4188 #ifdef XML_DTD
4189 else
4190 /* use externalSubsetName to make doctypeSysid non-NULL
4191 for the case where no startDoctypeDeclHandler is set */
4192 doctypeSysid = externalSubsetName;
4193 #endif /* XML_DTD */
4194 if (!dtd->standalone
4195 #ifdef XML_DTD
4196 && !paramEntityParsing
4197 #endif /* XML_DTD */
4198 && notStandaloneHandler
4199 && !notStandaloneHandler(handlerArg))
4200 return XML_ERROR_NOT_STANDALONE;
4201 #ifndef XML_DTD
4202 break;
4203 #else /* XML_DTD */
4204 if (!declEntity) {
4205 declEntity = (ENTITY *)lookup(parser,
4206 &dtd->paramEntities,
4207 externalSubsetName,
4208 sizeof(ENTITY));
4209 if (!declEntity)
4210 return XML_ERROR_NO_MEMORY;
4211 declEntity->publicId = NULL;
4213 /* fall through */
4214 #endif /* XML_DTD */
4215 case XML_ROLE_ENTITY_SYSTEM_ID:
4216 if (dtd->keepProcessing && declEntity) {
4217 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4218 s + enc->minBytesPerChar,
4219 next - enc->minBytesPerChar);
4220 if (!declEntity->systemId)
4221 return XML_ERROR_NO_MEMORY;
4222 declEntity->base = curBase;
4223 poolFinish(&dtd->pool);
4224 if (entityDeclHandler)
4225 handleDefault = XML_FALSE;
4227 break;
4228 case XML_ROLE_ENTITY_COMPLETE:
4229 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4230 *eventEndPP = s;
4231 entityDeclHandler(handlerArg,
4232 declEntity->name,
4233 declEntity->is_param,
4234 0,0,
4235 declEntity->base,
4236 declEntity->systemId,
4237 declEntity->publicId,
4239 handleDefault = XML_FALSE;
4241 break;
4242 case XML_ROLE_ENTITY_NOTATION_NAME:
4243 if (dtd->keepProcessing && declEntity) {
4244 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4245 if (!declEntity->notation)
4246 return XML_ERROR_NO_MEMORY;
4247 poolFinish(&dtd->pool);
4248 if (unparsedEntityDeclHandler) {
4249 *eventEndPP = s;
4250 unparsedEntityDeclHandler(handlerArg,
4251 declEntity->name,
4252 declEntity->base,
4253 declEntity->systemId,
4254 declEntity->publicId,
4255 declEntity->notation);
4256 handleDefault = XML_FALSE;
4258 else if (entityDeclHandler) {
4259 *eventEndPP = s;
4260 entityDeclHandler(handlerArg,
4261 declEntity->name,
4262 0,0,0,
4263 declEntity->base,
4264 declEntity->systemId,
4265 declEntity->publicId,
4266 declEntity->notation);
4267 handleDefault = XML_FALSE;
4270 break;
4271 case XML_ROLE_GENERAL_ENTITY_NAME:
4273 if (XmlPredefinedEntityName(enc, s, next)) {
4274 declEntity = NULL;
4275 break;
4277 if (dtd->keepProcessing) {
4278 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4279 if (!name)
4280 return XML_ERROR_NO_MEMORY;
4281 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
4282 sizeof(ENTITY));
4283 if (!declEntity)
4284 return XML_ERROR_NO_MEMORY;
4285 if (declEntity->name != name) {
4286 poolDiscard(&dtd->pool);
4287 declEntity = NULL;
4289 else {
4290 poolFinish(&dtd->pool);
4291 declEntity->publicId = NULL;
4292 declEntity->is_param = XML_FALSE;
4293 /* if we have a parent parser or are reading an internal parameter
4294 entity, then the entity declaration is not considered "internal"
4296 declEntity->is_internal = !(parentParser || openInternalEntities);
4297 if (entityDeclHandler)
4298 handleDefault = XML_FALSE;
4301 else {
4302 poolDiscard(&dtd->pool);
4303 declEntity = NULL;
4306 break;
4307 case XML_ROLE_PARAM_ENTITY_NAME:
4308 #ifdef XML_DTD
4309 if (dtd->keepProcessing) {
4310 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4311 if (!name)
4312 return XML_ERROR_NO_MEMORY;
4313 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4314 name, sizeof(ENTITY));
4315 if (!declEntity)
4316 return XML_ERROR_NO_MEMORY;
4317 if (declEntity->name != name) {
4318 poolDiscard(&dtd->pool);
4319 declEntity = NULL;
4321 else {
4322 poolFinish(&dtd->pool);
4323 declEntity->publicId = NULL;
4324 declEntity->is_param = XML_TRUE;
4325 /* if we have a parent parser or are reading an internal parameter
4326 entity, then the entity declaration is not considered "internal"
4328 declEntity->is_internal = !(parentParser || openInternalEntities);
4329 if (entityDeclHandler)
4330 handleDefault = XML_FALSE;
4333 else {
4334 poolDiscard(&dtd->pool);
4335 declEntity = NULL;
4337 #else /* not XML_DTD */
4338 declEntity = NULL;
4339 #endif /* XML_DTD */
4340 break;
4341 case XML_ROLE_NOTATION_NAME:
4342 declNotationPublicId = NULL;
4343 declNotationName = NULL;
4344 if (notationDeclHandler) {
4345 declNotationName = poolStoreString(&tempPool, enc, s, next);
4346 if (!declNotationName)
4347 return XML_ERROR_NO_MEMORY;
4348 poolFinish(&tempPool);
4349 handleDefault = XML_FALSE;
4351 break;
4352 case XML_ROLE_NOTATION_PUBLIC_ID:
4353 if (!XmlIsPublicId(enc, s, next, eventPP))
4354 return XML_ERROR_PUBLICID;
4355 if (declNotationName) { /* means notationDeclHandler != NULL */
4356 XML_Char *tem = poolStoreString(&tempPool,
4357 enc,
4358 s + enc->minBytesPerChar,
4359 next - enc->minBytesPerChar);
4360 if (!tem)
4361 return XML_ERROR_NO_MEMORY;
4362 normalizePublicId(tem);
4363 declNotationPublicId = tem;
4364 poolFinish(&tempPool);
4365 handleDefault = XML_FALSE;
4367 break;
4368 case XML_ROLE_NOTATION_SYSTEM_ID:
4369 if (declNotationName && notationDeclHandler) {
4370 const XML_Char *systemId
4371 = poolStoreString(&tempPool, enc,
4372 s + enc->minBytesPerChar,
4373 next - enc->minBytesPerChar);
4374 if (!systemId)
4375 return XML_ERROR_NO_MEMORY;
4376 *eventEndPP = s;
4377 notationDeclHandler(handlerArg,
4378 declNotationName,
4379 curBase,
4380 systemId,
4381 declNotationPublicId);
4382 handleDefault = XML_FALSE;
4384 poolClear(&tempPool);
4385 break;
4386 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4387 if (declNotationPublicId && notationDeclHandler) {
4388 *eventEndPP = s;
4389 notationDeclHandler(handlerArg,
4390 declNotationName,
4391 curBase,
4393 declNotationPublicId);
4394 handleDefault = XML_FALSE;
4396 poolClear(&tempPool);
4397 break;
4398 case XML_ROLE_ERROR:
4399 switch (tok) {
4400 case XML_TOK_PARAM_ENTITY_REF:
4401 /* PE references in internal subset are
4402 not allowed within declarations. */
4403 return XML_ERROR_PARAM_ENTITY_REF;
4404 case XML_TOK_XML_DECL:
4405 return XML_ERROR_MISPLACED_XML_PI;
4406 default:
4407 return XML_ERROR_SYNTAX;
4409 #ifdef XML_DTD
4410 case XML_ROLE_IGNORE_SECT:
4412 enum XML_Error result;
4413 if (defaultHandler)
4414 reportDefault(parser, enc, s, next);
4415 handleDefault = XML_FALSE;
4416 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4417 if (result != XML_ERROR_NONE)
4418 return result;
4419 else if (!next) {
4420 processor = ignoreSectionProcessor;
4421 return result;
4424 break;
4425 #endif /* XML_DTD */
4426 case XML_ROLE_GROUP_OPEN:
4427 if (prologState.level >= groupSize) {
4428 if (groupSize) {
4429 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
4430 if (temp == NULL)
4431 return XML_ERROR_NO_MEMORY;
4432 groupConnector = temp;
4433 if (dtd->scaffIndex) {
4434 int *temp = (int *)REALLOC(dtd->scaffIndex,
4435 groupSize * sizeof(int));
4436 if (temp == NULL)
4437 return XML_ERROR_NO_MEMORY;
4438 dtd->scaffIndex = temp;
4441 else {
4442 groupConnector = (char *)MALLOC(groupSize = 32);
4443 if (!groupConnector)
4444 return XML_ERROR_NO_MEMORY;
4447 groupConnector[prologState.level] = 0;
4448 if (dtd->in_eldecl) {
4449 int myindex = nextScaffoldPart(parser);
4450 if (myindex < 0)
4451 return XML_ERROR_NO_MEMORY;
4452 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4453 dtd->scaffLevel++;
4454 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4455 if (elementDeclHandler)
4456 handleDefault = XML_FALSE;
4458 break;
4459 case XML_ROLE_GROUP_SEQUENCE:
4460 if (groupConnector[prologState.level] == ASCII_PIPE)
4461 return XML_ERROR_SYNTAX;
4462 groupConnector[prologState.level] = ASCII_COMMA;
4463 if (dtd->in_eldecl && elementDeclHandler)
4464 handleDefault = XML_FALSE;
4465 break;
4466 case XML_ROLE_GROUP_CHOICE:
4467 if (groupConnector[prologState.level] == ASCII_COMMA)
4468 return XML_ERROR_SYNTAX;
4469 if (dtd->in_eldecl
4470 && !groupConnector[prologState.level]
4471 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4472 != XML_CTYPE_MIXED)
4474 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4475 = XML_CTYPE_CHOICE;
4476 if (elementDeclHandler)
4477 handleDefault = XML_FALSE;
4479 groupConnector[prologState.level] = ASCII_PIPE;
4480 break;
4481 case XML_ROLE_PARAM_ENTITY_REF:
4482 #ifdef XML_DTD
4483 case XML_ROLE_INNER_PARAM_ENTITY_REF:
4484 dtd->hasParamEntityRefs = XML_TRUE;
4485 if (!paramEntityParsing)
4486 dtd->keepProcessing = dtd->standalone;
4487 else {
4488 const XML_Char *name;
4489 ENTITY *entity;
4490 name = poolStoreString(&dtd->pool, enc,
4491 s + enc->minBytesPerChar,
4492 next - enc->minBytesPerChar);
4493 if (!name)
4494 return XML_ERROR_NO_MEMORY;
4495 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
4496 poolDiscard(&dtd->pool);
4497 /* first, determine if a check for an existing declaration is needed;
4498 if yes, check that the entity exists, and that it is internal,
4499 otherwise call the skipped entity handler
4501 if (prologState.documentEntity &&
4502 (dtd->standalone
4503 ? !openInternalEntities
4504 : !dtd->hasParamEntityRefs)) {
4505 if (!entity)
4506 return XML_ERROR_UNDEFINED_ENTITY;
4507 else if (!entity->is_internal)
4508 return XML_ERROR_ENTITY_DECLARED_IN_PE;
4510 else if (!entity) {
4511 dtd->keepProcessing = dtd->standalone;
4512 /* cannot report skipped entities in declarations */
4513 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
4514 skippedEntityHandler(handlerArg, name, 1);
4515 handleDefault = XML_FALSE;
4517 break;
4519 if (entity->open)
4520 return XML_ERROR_RECURSIVE_ENTITY_REF;
4521 if (entity->textPtr) {
4522 enum XML_Error result;
4523 XML_Bool betweenDecl =
4524 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
4525 result = processInternalEntity(parser, entity, betweenDecl);
4526 if (result != XML_ERROR_NONE)
4527 return result;
4528 handleDefault = XML_FALSE;
4529 break;
4531 if (externalEntityRefHandler) {
4532 dtd->paramEntityRead = XML_FALSE;
4533 entity->open = XML_TRUE;
4534 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4536 entity->base,
4537 entity->systemId,
4538 entity->publicId)) {
4539 entity->open = XML_FALSE;
4540 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4542 entity->open = XML_FALSE;
4543 handleDefault = XML_FALSE;
4544 if (!dtd->paramEntityRead) {
4545 dtd->keepProcessing = dtd->standalone;
4546 break;
4549 else {
4550 dtd->keepProcessing = dtd->standalone;
4551 break;
4554 #endif /* XML_DTD */
4555 if (!dtd->standalone &&
4556 notStandaloneHandler &&
4557 !notStandaloneHandler(handlerArg))
4558 return XML_ERROR_NOT_STANDALONE;
4559 break;
4561 /* Element declaration stuff */
4563 case XML_ROLE_ELEMENT_NAME:
4564 if (elementDeclHandler) {
4565 declElementType = getElementType(parser, enc, s, next);
4566 if (!declElementType)
4567 return XML_ERROR_NO_MEMORY;
4568 dtd->scaffLevel = 0;
4569 dtd->scaffCount = 0;
4570 dtd->in_eldecl = XML_TRUE;
4571 handleDefault = XML_FALSE;
4573 break;
4575 case XML_ROLE_CONTENT_ANY:
4576 case XML_ROLE_CONTENT_EMPTY:
4577 if (dtd->in_eldecl) {
4578 if (elementDeclHandler) {
4579 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
4580 if (!content)
4581 return XML_ERROR_NO_MEMORY;
4582 content->quant = XML_CQUANT_NONE;
4583 content->name = NULL;
4584 content->numchildren = 0;
4585 content->children = NULL;
4586 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
4587 XML_CTYPE_ANY :
4588 XML_CTYPE_EMPTY);
4589 *eventEndPP = s;
4590 elementDeclHandler(handlerArg, declElementType->name, content);
4591 handleDefault = XML_FALSE;
4593 dtd->in_eldecl = XML_FALSE;
4595 break;
4597 case XML_ROLE_CONTENT_PCDATA:
4598 if (dtd->in_eldecl) {
4599 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4600 = XML_CTYPE_MIXED;
4601 if (elementDeclHandler)
4602 handleDefault = XML_FALSE;
4604 break;
4606 case XML_ROLE_CONTENT_ELEMENT:
4607 quant = XML_CQUANT_NONE;
4608 goto elementContent;
4609 case XML_ROLE_CONTENT_ELEMENT_OPT:
4610 quant = XML_CQUANT_OPT;
4611 goto elementContent;
4612 case XML_ROLE_CONTENT_ELEMENT_REP:
4613 quant = XML_CQUANT_REP;
4614 goto elementContent;
4615 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4616 quant = XML_CQUANT_PLUS;
4617 elementContent:
4618 if (dtd->in_eldecl) {
4619 ELEMENT_TYPE *el;
4620 const XML_Char *name;
4621 int nameLen;
4622 const char *nxt = (quant == XML_CQUANT_NONE
4623 ? next
4624 : next - enc->minBytesPerChar);
4625 int myindex = nextScaffoldPart(parser);
4626 if (myindex < 0)
4627 return XML_ERROR_NO_MEMORY;
4628 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4629 dtd->scaffold[myindex].quant = quant;
4630 el = getElementType(parser, enc, s, nxt);
4631 if (!el)
4632 return XML_ERROR_NO_MEMORY;
4633 name = el->name;
4634 dtd->scaffold[myindex].name = name;
4635 nameLen = 0;
4636 for (; name[nameLen++]; );
4637 dtd->contentStringLen += nameLen;
4638 if (elementDeclHandler)
4639 handleDefault = XML_FALSE;
4641 break;
4643 case XML_ROLE_GROUP_CLOSE:
4644 quant = XML_CQUANT_NONE;
4645 goto closeGroup;
4646 case XML_ROLE_GROUP_CLOSE_OPT:
4647 quant = XML_CQUANT_OPT;
4648 goto closeGroup;
4649 case XML_ROLE_GROUP_CLOSE_REP:
4650 quant = XML_CQUANT_REP;
4651 goto closeGroup;
4652 case XML_ROLE_GROUP_CLOSE_PLUS:
4653 quant = XML_CQUANT_PLUS;
4654 closeGroup:
4655 if (dtd->in_eldecl) {
4656 if (elementDeclHandler)
4657 handleDefault = XML_FALSE;
4658 dtd->scaffLevel--;
4659 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
4660 if (dtd->scaffLevel == 0) {
4661 if (!handleDefault) {
4662 XML_Content *model = build_model(parser);
4663 if (!model)
4664 return XML_ERROR_NO_MEMORY;
4665 *eventEndPP = s;
4666 elementDeclHandler(handlerArg, declElementType->name, model);
4668 dtd->in_eldecl = XML_FALSE;
4669 dtd->contentStringLen = 0;
4672 break;
4673 /* End element declaration stuff */
4675 case XML_ROLE_PI:
4676 if (!reportProcessingInstruction(parser, enc, s, next))
4677 return XML_ERROR_NO_MEMORY;
4678 handleDefault = XML_FALSE;
4679 break;
4680 case XML_ROLE_COMMENT:
4681 if (!reportComment(parser, enc, s, next))
4682 return XML_ERROR_NO_MEMORY;
4683 handleDefault = XML_FALSE;
4684 break;
4685 case XML_ROLE_NONE:
4686 switch (tok) {
4687 case XML_TOK_BOM:
4688 handleDefault = XML_FALSE;
4689 break;
4691 break;
4692 case XML_ROLE_DOCTYPE_NONE:
4693 if (startDoctypeDeclHandler)
4694 handleDefault = XML_FALSE;
4695 break;
4696 case XML_ROLE_ENTITY_NONE:
4697 if (dtd->keepProcessing && entityDeclHandler)
4698 handleDefault = XML_FALSE;
4699 break;
4700 case XML_ROLE_NOTATION_NONE:
4701 if (notationDeclHandler)
4702 handleDefault = XML_FALSE;
4703 break;
4704 case XML_ROLE_ATTLIST_NONE:
4705 if (dtd->keepProcessing && attlistDeclHandler)
4706 handleDefault = XML_FALSE;
4707 break;
4708 case XML_ROLE_ELEMENT_NONE:
4709 if (elementDeclHandler)
4710 handleDefault = XML_FALSE;
4711 break;
4712 } /* end of big switch */
4714 if (handleDefault && defaultHandler)
4715 reportDefault(parser, enc, s, next);
4717 switch (ps_parsing) {
4718 case XML_SUSPENDED:
4719 *nextPtr = next;
4720 return XML_ERROR_NONE;
4721 case XML_FINISHED:
4722 return XML_ERROR_ABORTED;
4723 default:
4724 s = next;
4725 tok = XmlPrologTok(enc, s, end, &next);
4728 /* not reached */
4731 static enum XML_Error PTRCALL
4732 epilogProcessor(XML_Parser parser,
4733 const char *s,
4734 const char *end,
4735 const char **nextPtr)
4737 processor = epilogProcessor;
4738 eventPtr = s;
4739 for (;;) {
4740 const char *next = NULL;
4741 int tok = XmlPrologTok(encoding, s, end, &next);
4742 eventEndPtr = next;
4743 switch (tok) {
4744 /* report partial linebreak - it might be the last token */
4745 case -XML_TOK_PROLOG_S:
4746 if (defaultHandler) {
4747 reportDefault(parser, encoding, s, next);
4748 if (ps_parsing == XML_FINISHED)
4749 return XML_ERROR_ABORTED;
4751 *nextPtr = next;
4752 return XML_ERROR_NONE;
4753 case XML_TOK_NONE:
4754 *nextPtr = s;
4755 return XML_ERROR_NONE;
4756 case XML_TOK_PROLOG_S:
4757 if (defaultHandler)
4758 reportDefault(parser, encoding, s, next);
4759 break;
4760 case XML_TOK_PI:
4761 if (!reportProcessingInstruction(parser, encoding, s, next))
4762 return XML_ERROR_NO_MEMORY;
4763 break;
4764 case XML_TOK_COMMENT:
4765 if (!reportComment(parser, encoding, s, next))
4766 return XML_ERROR_NO_MEMORY;
4767 break;
4768 case XML_TOK_INVALID:
4769 eventPtr = next;
4770 return XML_ERROR_INVALID_TOKEN;
4771 case XML_TOK_PARTIAL:
4772 if (!ps_finalBuffer) {
4773 *nextPtr = s;
4774 return XML_ERROR_NONE;
4776 return XML_ERROR_UNCLOSED_TOKEN;
4777 case XML_TOK_PARTIAL_CHAR:
4778 if (!ps_finalBuffer) {
4779 *nextPtr = s;
4780 return XML_ERROR_NONE;
4782 return XML_ERROR_PARTIAL_CHAR;
4783 default:
4784 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
4786 eventPtr = s = next;
4787 switch (ps_parsing) {
4788 case XML_SUSPENDED:
4789 *nextPtr = next;
4790 return XML_ERROR_NONE;
4791 case XML_FINISHED:
4792 return XML_ERROR_ABORTED;
4793 default: ;
4798 static enum XML_Error
4799 processInternalEntity(XML_Parser parser, ENTITY *entity,
4800 XML_Bool betweenDecl)
4802 const char *textStart, *textEnd;
4803 const char *next;
4804 enum XML_Error result;
4805 OPEN_INTERNAL_ENTITY *openEntity;
4807 if (freeInternalEntities) {
4808 openEntity = freeInternalEntities;
4809 freeInternalEntities = openEntity->next;
4811 else {
4812 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
4813 if (!openEntity)
4814 return XML_ERROR_NO_MEMORY;
4816 entity->open = XML_TRUE;
4817 entity->processed = 0;
4818 openEntity->next = openInternalEntities;
4819 openInternalEntities = openEntity;
4820 openEntity->entity = entity;
4821 openEntity->startTagLevel = tagLevel;
4822 openEntity->betweenDecl = betweenDecl;
4823 openEntity->internalEventPtr = NULL;
4824 openEntity->internalEventEndPtr = NULL;
4825 textStart = (char *)entity->textPtr;
4826 textEnd = (char *)(entity->textPtr + entity->textLen);
4828 #ifdef XML_DTD
4829 if (entity->is_param) {
4830 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
4831 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
4832 next, &next, XML_FALSE);
4834 else
4835 #endif /* XML_DTD */
4836 result = doContent(parser, tagLevel, internalEncoding, textStart,
4837 textEnd, &next, XML_FALSE);
4839 if (result == XML_ERROR_NONE) {
4840 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4841 entity->processed = (int)(next - textStart);
4842 processor = internalEntityProcessor;
4844 else {
4845 entity->open = XML_FALSE;
4846 openInternalEntities = openEntity->next;
4847 /* put openEntity back in list of free instances */
4848 openEntity->next = freeInternalEntities;
4849 freeInternalEntities = openEntity;
4852 return result;
4855 static enum XML_Error PTRCALL
4856 internalEntityProcessor(XML_Parser parser,
4857 const char *s,
4858 const char *end,
4859 const char **nextPtr)
4861 ENTITY *entity;
4862 const char *textStart, *textEnd;
4863 const char *next;
4864 enum XML_Error result;
4865 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
4866 if (!openEntity)
4867 return XML_ERROR_UNEXPECTED_STATE;
4869 entity = openEntity->entity;
4870 textStart = ((char *)entity->textPtr) + entity->processed;
4871 textEnd = (char *)(entity->textPtr + entity->textLen);
4873 #ifdef XML_DTD
4874 if (entity->is_param) {
4875 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
4876 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
4877 next, &next, XML_FALSE);
4879 else
4880 #endif /* XML_DTD */
4881 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
4882 textStart, textEnd, &next, XML_FALSE);
4884 if (result != XML_ERROR_NONE)
4885 return result;
4886 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4887 entity->processed = (int)(next - (char *)entity->textPtr);
4888 return result;
4890 else {
4891 entity->open = XML_FALSE;
4892 openInternalEntities = openEntity->next;
4893 /* put openEntity back in list of free instances */
4894 openEntity->next = freeInternalEntities;
4895 freeInternalEntities = openEntity;
4898 #ifdef XML_DTD
4899 if (entity->is_param) {
4900 int tok;
4901 processor = prologProcessor;
4902 tok = XmlPrologTok(encoding, s, end, &next);
4903 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
4904 (XML_Bool)!ps_finalBuffer);
4906 else
4907 #endif /* XML_DTD */
4909 processor = contentProcessor;
4910 /* see externalEntityContentProcessor vs contentProcessor */
4911 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
4912 nextPtr, (XML_Bool)!ps_finalBuffer);
4916 static enum XML_Error PTRCALL
4917 errorProcessor(XML_Parser parser,
4918 const char *s,
4919 const char *end,
4920 const char **nextPtr)
4922 return errorCode;
4925 static enum XML_Error
4926 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4927 const char *ptr, const char *end,
4928 STRING_POOL *pool)
4930 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
4931 end, pool);
4932 if (result)
4933 return result;
4934 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
4935 poolChop(pool);
4936 if (!poolAppendChar(pool, XML_T('\0')))
4937 return XML_ERROR_NO_MEMORY;
4938 return XML_ERROR_NONE;
4941 static enum XML_Error
4942 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4943 const char *ptr, const char *end,
4944 STRING_POOL *pool)
4946 DTD * const dtd = _dtd; /* save one level of indirection */
4947 for (;;) {
4948 const char *next;
4949 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
4950 switch (tok) {
4951 case XML_TOK_NONE:
4952 return XML_ERROR_NONE;
4953 case XML_TOK_INVALID:
4954 if (enc == encoding)
4955 eventPtr = next;
4956 return XML_ERROR_INVALID_TOKEN;
4957 case XML_TOK_PARTIAL:
4958 if (enc == encoding)
4959 eventPtr = ptr;
4960 return XML_ERROR_INVALID_TOKEN;
4961 case XML_TOK_CHAR_REF:
4963 XML_Char buf[XML_ENCODE_MAX];
4964 int i;
4965 int n = XmlCharRefNumber(enc, ptr);
4966 if (n < 0) {
4967 if (enc == encoding)
4968 eventPtr = ptr;
4969 return XML_ERROR_BAD_CHAR_REF;
4971 if (!isCdata
4972 && n == 0x20 /* space */
4973 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
4974 break;
4975 n = XmlEncode(n, (ICHAR *)buf);
4976 if (!n) {
4977 if (enc == encoding)
4978 eventPtr = ptr;
4979 return XML_ERROR_BAD_CHAR_REF;
4981 for (i = 0; i < n; i++) {
4982 if (!poolAppendChar(pool, buf[i]))
4983 return XML_ERROR_NO_MEMORY;
4986 break;
4987 case XML_TOK_DATA_CHARS:
4988 if (!poolAppend(pool, enc, ptr, next))
4989 return XML_ERROR_NO_MEMORY;
4990 break;
4991 case XML_TOK_TRAILING_CR:
4992 next = ptr + enc->minBytesPerChar;
4993 /* fall through */
4994 case XML_TOK_ATTRIBUTE_VALUE_S:
4995 case XML_TOK_DATA_NEWLINE:
4996 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
4997 break;
4998 if (!poolAppendChar(pool, 0x20))
4999 return XML_ERROR_NO_MEMORY;
5000 break;
5001 case XML_TOK_ENTITY_REF:
5003 const XML_Char *name;
5004 ENTITY *entity;
5005 char checkEntityDecl;
5006 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5007 ptr + enc->minBytesPerChar,
5008 next - enc->minBytesPerChar);
5009 if (ch) {
5010 if (!poolAppendChar(pool, ch))
5011 return XML_ERROR_NO_MEMORY;
5012 break;
5014 name = poolStoreString(&temp2Pool, enc,
5015 ptr + enc->minBytesPerChar,
5016 next - enc->minBytesPerChar);
5017 if (!name)
5018 return XML_ERROR_NO_MEMORY;
5019 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5020 poolDiscard(&temp2Pool);
5021 /* First, determine if a check for an existing declaration is needed;
5022 if yes, check that the entity exists, and that it is internal.
5024 if (pool == &dtd->pool) /* are we called from prolog? */
5025 checkEntityDecl =
5026 #ifdef XML_DTD
5027 prologState.documentEntity &&
5028 #endif /* XML_DTD */
5029 (dtd->standalone
5030 ? !openInternalEntities
5031 : !dtd->hasParamEntityRefs);
5032 else /* if (pool == &tempPool): we are called from content */
5033 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5034 if (checkEntityDecl) {
5035 if (!entity)
5036 return XML_ERROR_UNDEFINED_ENTITY;
5037 else if (!entity->is_internal)
5038 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5040 else if (!entity) {
5041 /* Cannot report skipped entity here - see comments on
5042 skippedEntityHandler.
5043 if (skippedEntityHandler)
5044 skippedEntityHandler(handlerArg, name, 0);
5046 /* Cannot call the default handler because this would be
5047 out of sync with the call to the startElementHandler.
5048 if ((pool == &tempPool) && defaultHandler)
5049 reportDefault(parser, enc, ptr, next);
5051 break;
5053 if (entity->open) {
5054 if (enc == encoding)
5055 eventPtr = ptr;
5056 return XML_ERROR_RECURSIVE_ENTITY_REF;
5058 if (entity->notation) {
5059 if (enc == encoding)
5060 eventPtr = ptr;
5061 return XML_ERROR_BINARY_ENTITY_REF;
5063 if (!entity->textPtr) {
5064 if (enc == encoding)
5065 eventPtr = ptr;
5066 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5068 else {
5069 enum XML_Error result;
5070 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5071 entity->open = XML_TRUE;
5072 result = appendAttributeValue(parser, internalEncoding, isCdata,
5073 (char *)entity->textPtr,
5074 (char *)textEnd, pool);
5075 entity->open = XML_FALSE;
5076 if (result)
5077 return result;
5080 break;
5081 default:
5082 if (enc == encoding)
5083 eventPtr = ptr;
5084 return XML_ERROR_UNEXPECTED_STATE;
5086 ptr = next;
5088 /* not reached */
5091 static enum XML_Error
5092 storeEntityValue(XML_Parser parser,
5093 const ENCODING *enc,
5094 const char *entityTextPtr,
5095 const char *entityTextEnd)
5097 DTD * const dtd = _dtd; /* save one level of indirection */
5098 STRING_POOL *pool = &(dtd->entityValuePool);
5099 enum XML_Error result = XML_ERROR_NONE;
5100 #ifdef XML_DTD
5101 int oldInEntityValue = prologState.inEntityValue;
5102 prologState.inEntityValue = 1;
5103 #endif /* XML_DTD */
5104 /* never return Null for the value argument in EntityDeclHandler,
5105 since this would indicate an external entity; therefore we
5106 have to make sure that entityValuePool.start is not null */
5107 if (!pool->blocks) {
5108 if (!poolGrow(pool))
5109 return XML_ERROR_NO_MEMORY;
5112 for (;;) {
5113 const char *next;
5114 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5115 switch (tok) {
5116 case XML_TOK_PARAM_ENTITY_REF:
5117 #ifdef XML_DTD
5118 if (isParamEntity || enc != encoding) {
5119 const XML_Char *name;
5120 ENTITY *entity;
5121 name = poolStoreString(&tempPool, enc,
5122 entityTextPtr + enc->minBytesPerChar,
5123 next - enc->minBytesPerChar);
5124 if (!name) {
5125 result = XML_ERROR_NO_MEMORY;
5126 goto endEntityValue;
5128 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5129 poolDiscard(&tempPool);
5130 if (!entity) {
5131 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5132 /* cannot report skipped entity here - see comments on
5133 skippedEntityHandler
5134 if (skippedEntityHandler)
5135 skippedEntityHandler(handlerArg, name, 0);
5137 dtd->keepProcessing = dtd->standalone;
5138 goto endEntityValue;
5140 if (entity->open) {
5141 if (enc == encoding)
5142 eventPtr = entityTextPtr;
5143 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5144 goto endEntityValue;
5146 if (entity->systemId) {
5147 if (externalEntityRefHandler) {
5148 dtd->paramEntityRead = XML_FALSE;
5149 entity->open = XML_TRUE;
5150 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5152 entity->base,
5153 entity->systemId,
5154 entity->publicId)) {
5155 entity->open = XML_FALSE;
5156 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5157 goto endEntityValue;
5159 entity->open = XML_FALSE;
5160 if (!dtd->paramEntityRead)
5161 dtd->keepProcessing = dtd->standalone;
5163 else
5164 dtd->keepProcessing = dtd->standalone;
5166 else {
5167 entity->open = XML_TRUE;
5168 result = storeEntityValue(parser,
5169 internalEncoding,
5170 (char *)entity->textPtr,
5171 (char *)(entity->textPtr
5172 + entity->textLen));
5173 entity->open = XML_FALSE;
5174 if (result)
5175 goto endEntityValue;
5177 break;
5179 #endif /* XML_DTD */
5180 /* In the internal subset, PE references are not legal
5181 within markup declarations, e.g entity values in this case. */
5182 eventPtr = entityTextPtr;
5183 result = XML_ERROR_PARAM_ENTITY_REF;
5184 goto endEntityValue;
5185 case XML_TOK_NONE:
5186 result = XML_ERROR_NONE;
5187 goto endEntityValue;
5188 case XML_TOK_ENTITY_REF:
5189 case XML_TOK_DATA_CHARS:
5190 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5191 result = XML_ERROR_NO_MEMORY;
5192 goto endEntityValue;
5194 break;
5195 case XML_TOK_TRAILING_CR:
5196 next = entityTextPtr + enc->minBytesPerChar;
5197 /* fall through */
5198 case XML_TOK_DATA_NEWLINE:
5199 if (pool->end == pool->ptr && !poolGrow(pool)) {
5200 result = XML_ERROR_NO_MEMORY;
5201 goto endEntityValue;
5203 *(pool->ptr)++ = 0xA;
5204 break;
5205 case XML_TOK_CHAR_REF:
5207 XML_Char buf[XML_ENCODE_MAX];
5208 int i;
5209 int n = XmlCharRefNumber(enc, entityTextPtr);
5210 if (n < 0) {
5211 if (enc == encoding)
5212 eventPtr = entityTextPtr;
5213 result = XML_ERROR_BAD_CHAR_REF;
5214 goto endEntityValue;
5216 n = XmlEncode(n, (ICHAR *)buf);
5217 if (!n) {
5218 if (enc == encoding)
5219 eventPtr = entityTextPtr;
5220 result = XML_ERROR_BAD_CHAR_REF;
5221 goto endEntityValue;
5223 for (i = 0; i < n; i++) {
5224 if (pool->end == pool->ptr && !poolGrow(pool)) {
5225 result = XML_ERROR_NO_MEMORY;
5226 goto endEntityValue;
5228 *(pool->ptr)++ = buf[i];
5231 break;
5232 case XML_TOK_PARTIAL:
5233 if (enc == encoding)
5234 eventPtr = entityTextPtr;
5235 result = XML_ERROR_INVALID_TOKEN;
5236 goto endEntityValue;
5237 case XML_TOK_INVALID:
5238 if (enc == encoding)
5239 eventPtr = next;
5240 result = XML_ERROR_INVALID_TOKEN;
5241 goto endEntityValue;
5242 default:
5243 if (enc == encoding)
5244 eventPtr = entityTextPtr;
5245 result = XML_ERROR_UNEXPECTED_STATE;
5246 goto endEntityValue;
5248 entityTextPtr = next;
5250 endEntityValue:
5251 #ifdef XML_DTD
5252 prologState.inEntityValue = oldInEntityValue;
5253 #endif /* XML_DTD */
5254 return result;
5257 static void FASTCALL
5258 normalizeLines(XML_Char *s)
5260 XML_Char *p;
5261 for (;; s++) {
5262 if (*s == XML_T('\0'))
5263 return;
5264 if (*s == 0xD)
5265 break;
5267 p = s;
5268 do {
5269 if (*s == 0xD) {
5270 *p++ = 0xA;
5271 if (*++s == 0xA)
5272 s++;
5274 else
5275 *p++ = *s++;
5276 } while (*s);
5277 *p = XML_T('\0');
5280 static int
5281 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5282 const char *start, const char *end)
5284 const XML_Char *target;
5285 XML_Char *data;
5286 const char *tem;
5287 if (!processingInstructionHandler) {
5288 if (defaultHandler)
5289 reportDefault(parser, enc, start, end);
5290 return 1;
5292 start += enc->minBytesPerChar * 2;
5293 tem = start + XmlNameLength(enc, start);
5294 target = poolStoreString(&tempPool, enc, start, tem);
5295 if (!target)
5296 return 0;
5297 poolFinish(&tempPool);
5298 data = poolStoreString(&tempPool, enc,
5299 XmlSkipS(enc, tem),
5300 end - enc->minBytesPerChar*2);
5301 if (!data)
5302 return 0;
5303 normalizeLines(data);
5304 processingInstructionHandler(handlerArg, target, data);
5305 poolClear(&tempPool);
5306 return 1;
5309 static int
5310 reportComment(XML_Parser parser, const ENCODING *enc,
5311 const char *start, const char *end)
5313 XML_Char *data;
5314 if (!commentHandler) {
5315 if (defaultHandler)
5316 reportDefault(parser, enc, start, end);
5317 return 1;
5319 data = poolStoreString(&tempPool,
5320 enc,
5321 start + enc->minBytesPerChar * 4,
5322 end - enc->minBytesPerChar * 3);
5323 if (!data)
5324 return 0;
5325 normalizeLines(data);
5326 commentHandler(handlerArg, data);
5327 poolClear(&tempPool);
5328 return 1;
5331 static void
5332 reportDefault(XML_Parser parser, const ENCODING *enc,
5333 const char *s, const char *end)
5335 if (MUST_CONVERT(enc, s)) {
5336 const char **eventPP;
5337 const char **eventEndPP;
5338 if (enc == encoding) {
5339 eventPP = &eventPtr;
5340 eventEndPP = &eventEndPtr;
5342 else {
5343 eventPP = &(openInternalEntities->internalEventPtr);
5344 eventEndPP = &(openInternalEntities->internalEventEndPtr);
5346 do {
5347 ICHAR *dataPtr = (ICHAR *)dataBuf;
5348 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
5349 *eventEndPP = s;
5350 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
5351 *eventPP = s;
5352 } while (s != end);
5354 else
5355 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
5359 static int
5360 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5361 XML_Bool isId, const XML_Char *value, XML_Parser parser)
5363 DEFAULT_ATTRIBUTE *att;
5364 if (value || isId) {
5365 /* The handling of default attributes gets messed up if we have
5366 a default which duplicates a non-default. */
5367 int i;
5368 for (i = 0; i < type->nDefaultAtts; i++)
5369 if (attId == type->defaultAtts[i].id)
5370 return 1;
5371 if (isId && !type->idAtt && !attId->xmlns)
5372 type->idAtt = attId;
5374 if (type->nDefaultAtts == type->allocDefaultAtts) {
5375 if (type->allocDefaultAtts == 0) {
5376 type->allocDefaultAtts = 8;
5377 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
5378 * sizeof(DEFAULT_ATTRIBUTE));
5379 if (!type->defaultAtts)
5380 return 0;
5382 else {
5383 DEFAULT_ATTRIBUTE *temp;
5384 int count = type->allocDefaultAtts * 2;
5385 temp = (DEFAULT_ATTRIBUTE *)
5386 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
5387 if (temp == NULL)
5388 return 0;
5389 type->allocDefaultAtts = count;
5390 type->defaultAtts = temp;
5393 att = type->defaultAtts + type->nDefaultAtts;
5394 att->id = attId;
5395 att->value = value;
5396 att->isCdata = isCdata;
5397 if (!isCdata)
5398 attId->maybeTokenized = XML_TRUE;
5399 type->nDefaultAtts += 1;
5400 return 1;
5403 static int
5404 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
5406 DTD * const dtd = _dtd; /* save one level of indirection */
5407 const XML_Char *name;
5408 for (name = elementType->name; *name; name++) {
5409 if (*name == XML_T(ASCII_COLON)) {
5410 PREFIX *prefix;
5411 const XML_Char *s;
5412 for (s = elementType->name; s != name; s++) {
5413 if (!poolAppendChar(&dtd->pool, *s))
5414 return 0;
5416 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5417 return 0;
5418 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
5419 sizeof(PREFIX));
5420 if (!prefix)
5421 return 0;
5422 if (prefix->name == poolStart(&dtd->pool))
5423 poolFinish(&dtd->pool);
5424 else
5425 poolDiscard(&dtd->pool);
5426 elementType->prefix = prefix;
5430 return 1;
5433 static ATTRIBUTE_ID *
5434 getAttributeId(XML_Parser parser, const ENCODING *enc,
5435 const char *start, const char *end)
5437 DTD * const dtd = _dtd; /* save one level of indirection */
5438 ATTRIBUTE_ID *id;
5439 const XML_Char *name;
5440 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5441 return NULL;
5442 name = poolStoreString(&dtd->pool, enc, start, end);
5443 if (!name)
5444 return NULL;
5445 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
5446 ++name;
5447 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
5448 if (!id)
5449 return NULL;
5450 if (id->name != name)
5451 poolDiscard(&dtd->pool);
5452 else {
5453 poolFinish(&dtd->pool);
5454 if (!ns)
5456 else if (name[0] == XML_T(ASCII_x)
5457 && name[1] == XML_T(ASCII_m)
5458 && name[2] == XML_T(ASCII_l)
5459 && name[3] == XML_T(ASCII_n)
5460 && name[4] == XML_T(ASCII_s)
5461 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
5462 if (name[5] == XML_T('\0'))
5463 id->prefix = &dtd->defaultPrefix;
5464 else
5465 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
5466 id->xmlns = XML_TRUE;
5468 else {
5469 int i;
5470 for (i = 0; name[i]; i++) {
5471 /* attributes without prefix are *not* in the default namespace */
5472 if (name[i] == XML_T(ASCII_COLON)) {
5473 int j;
5474 for (j = 0; j < i; j++) {
5475 if (!poolAppendChar(&dtd->pool, name[j]))
5476 return NULL;
5478 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5479 return NULL;
5480 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
5481 sizeof(PREFIX));
5482 if (id->prefix->name == poolStart(&dtd->pool))
5483 poolFinish(&dtd->pool);
5484 else
5485 poolDiscard(&dtd->pool);
5486 break;
5491 return id;
5494 #define CONTEXT_SEP XML_T(ASCII_FF)
5496 static const XML_Char *
5497 getContext(XML_Parser parser)
5499 DTD * const dtd = _dtd; /* save one level of indirection */
5500 HASH_TABLE_ITER iter;
5501 XML_Bool needSep = XML_FALSE;
5503 if (dtd->defaultPrefix.binding) {
5504 int i;
5505 int len;
5506 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
5507 return NULL;
5508 len = dtd->defaultPrefix.binding->uriLen;
5509 if (namespaceSeparator)
5510 len--;
5511 for (i = 0; i < len; i++)
5512 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
5513 return NULL;
5514 needSep = XML_TRUE;
5517 hashTableIterInit(&iter, &(dtd->prefixes));
5518 for (;;) {
5519 int i;
5520 int len;
5521 const XML_Char *s;
5522 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
5523 if (!prefix)
5524 break;
5525 if (!prefix->binding)
5526 continue;
5527 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
5528 return NULL;
5529 for (s = prefix->name; *s; s++)
5530 if (!poolAppendChar(&tempPool, *s))
5531 return NULL;
5532 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
5533 return NULL;
5534 len = prefix->binding->uriLen;
5535 if (namespaceSeparator)
5536 len--;
5537 for (i = 0; i < len; i++)
5538 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
5539 return NULL;
5540 needSep = XML_TRUE;
5544 hashTableIterInit(&iter, &(dtd->generalEntities));
5545 for (;;) {
5546 const XML_Char *s;
5547 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
5548 if (!e)
5549 break;
5550 if (!e->open)
5551 continue;
5552 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
5553 return NULL;
5554 for (s = e->name; *s; s++)
5555 if (!poolAppendChar(&tempPool, *s))
5556 return 0;
5557 needSep = XML_TRUE;
5560 if (!poolAppendChar(&tempPool, XML_T('\0')))
5561 return NULL;
5562 return tempPool.start;
5565 static XML_Bool
5566 setContext(XML_Parser parser, const XML_Char *context)
5568 DTD * const dtd = _dtd; /* save one level of indirection */
5569 const XML_Char *s = context;
5571 while (*context != XML_T('\0')) {
5572 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5573 ENTITY *e;
5574 if (!poolAppendChar(&tempPool, XML_T('\0')))
5575 return XML_FALSE;
5576 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
5577 if (e)
5578 e->open = XML_TRUE;
5579 if (*s != XML_T('\0'))
5580 s++;
5581 context = s;
5582 poolDiscard(&tempPool);
5584 else if (*s == XML_T(ASCII_EQUALS)) {
5585 PREFIX *prefix;
5586 if (poolLength(&tempPool) == 0)
5587 prefix = &dtd->defaultPrefix;
5588 else {
5589 if (!poolAppendChar(&tempPool, XML_T('\0')))
5590 return XML_FALSE;
5591 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
5592 sizeof(PREFIX));
5593 if (!prefix)
5594 return XML_FALSE;
5595 if (prefix->name == poolStart(&tempPool)) {
5596 prefix->name = poolCopyString(&dtd->pool, prefix->name);
5597 if (!prefix->name)
5598 return XML_FALSE;
5600 poolDiscard(&tempPool);
5602 for (context = s + 1;
5603 *context != CONTEXT_SEP && *context != XML_T('\0');
5604 context++)
5605 if (!poolAppendChar(&tempPool, *context))
5606 return XML_FALSE;
5607 if (!poolAppendChar(&tempPool, XML_T('\0')))
5608 return XML_FALSE;
5609 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
5610 &inheritedBindings) != XML_ERROR_NONE)
5611 return XML_FALSE;
5612 poolDiscard(&tempPool);
5613 if (*context != XML_T('\0'))
5614 ++context;
5615 s = context;
5617 else {
5618 if (!poolAppendChar(&tempPool, *s))
5619 return XML_FALSE;
5620 s++;
5623 return XML_TRUE;
5626 static void FASTCALL
5627 normalizePublicId(XML_Char *publicId)
5629 XML_Char *p = publicId;
5630 XML_Char *s;
5631 for (s = publicId; *s; s++) {
5632 switch (*s) {
5633 case 0x20:
5634 case 0xD:
5635 case 0xA:
5636 if (p != publicId && p[-1] != 0x20)
5637 *p++ = 0x20;
5638 break;
5639 default:
5640 *p++ = *s;
5643 if (p != publicId && p[-1] == 0x20)
5644 --p;
5645 *p = XML_T('\0');
5648 static DTD *
5649 dtdCreate(const XML_Memory_Handling_Suite *ms)
5651 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
5652 if (p == NULL)
5653 return p;
5654 poolInit(&(p->pool), ms);
5655 poolInit(&(p->entityValuePool), ms);
5656 hashTableInit(&(p->generalEntities), ms);
5657 hashTableInit(&(p->elementTypes), ms);
5658 hashTableInit(&(p->attributeIds), ms);
5659 hashTableInit(&(p->prefixes), ms);
5660 #ifdef XML_DTD
5661 p->paramEntityRead = XML_FALSE;
5662 hashTableInit(&(p->paramEntities), ms);
5663 #endif /* XML_DTD */
5664 p->defaultPrefix.name = NULL;
5665 p->defaultPrefix.binding = NULL;
5667 p->in_eldecl = XML_FALSE;
5668 p->scaffIndex = NULL;
5669 p->scaffold = NULL;
5670 p->scaffLevel = 0;
5671 p->scaffSize = 0;
5672 p->scaffCount = 0;
5673 p->contentStringLen = 0;
5675 p->keepProcessing = XML_TRUE;
5676 p->hasParamEntityRefs = XML_FALSE;
5677 p->standalone = XML_FALSE;
5678 return p;
5681 static void
5682 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
5684 HASH_TABLE_ITER iter;
5685 hashTableIterInit(&iter, &(p->elementTypes));
5686 for (;;) {
5687 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5688 if (!e)
5689 break;
5690 if (e->allocDefaultAtts != 0)
5691 ms->free_fcn(e->defaultAtts);
5693 hashTableClear(&(p->generalEntities));
5694 #ifdef XML_DTD
5695 p->paramEntityRead = XML_FALSE;
5696 hashTableClear(&(p->paramEntities));
5697 #endif /* XML_DTD */
5698 hashTableClear(&(p->elementTypes));
5699 hashTableClear(&(p->attributeIds));
5700 hashTableClear(&(p->prefixes));
5701 poolClear(&(p->pool));
5702 poolClear(&(p->entityValuePool));
5703 p->defaultPrefix.name = NULL;
5704 p->defaultPrefix.binding = NULL;
5706 p->in_eldecl = XML_FALSE;
5708 ms->free_fcn(p->scaffIndex);
5709 p->scaffIndex = NULL;
5710 ms->free_fcn(p->scaffold);
5711 p->scaffold = NULL;
5713 p->scaffLevel = 0;
5714 p->scaffSize = 0;
5715 p->scaffCount = 0;
5716 p->contentStringLen = 0;
5718 p->keepProcessing = XML_TRUE;
5719 p->hasParamEntityRefs = XML_FALSE;
5720 p->standalone = XML_FALSE;
5723 static void
5724 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
5726 HASH_TABLE_ITER iter;
5727 hashTableIterInit(&iter, &(p->elementTypes));
5728 for (;;) {
5729 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5730 if (!e)
5731 break;
5732 if (e->allocDefaultAtts != 0)
5733 ms->free_fcn(e->defaultAtts);
5735 hashTableDestroy(&(p->generalEntities));
5736 #ifdef XML_DTD
5737 hashTableDestroy(&(p->paramEntities));
5738 #endif /* XML_DTD */
5739 hashTableDestroy(&(p->elementTypes));
5740 hashTableDestroy(&(p->attributeIds));
5741 hashTableDestroy(&(p->prefixes));
5742 poolDestroy(&(p->pool));
5743 poolDestroy(&(p->entityValuePool));
5744 if (isDocEntity) {
5745 ms->free_fcn(p->scaffIndex);
5746 ms->free_fcn(p->scaffold);
5748 ms->free_fcn(p);
5751 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
5752 The new DTD has already been initialized.
5754 static int
5755 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
5757 HASH_TABLE_ITER iter;
5759 /* Copy the prefix table. */
5761 hashTableIterInit(&iter, &(oldDtd->prefixes));
5762 for (;;) {
5763 const XML_Char *name;
5764 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
5765 if (!oldP)
5766 break;
5767 name = poolCopyString(&(newDtd->pool), oldP->name);
5768 if (!name)
5769 return 0;
5770 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
5771 return 0;
5774 hashTableIterInit(&iter, &(oldDtd->attributeIds));
5776 /* Copy the attribute id table. */
5778 for (;;) {
5779 ATTRIBUTE_ID *newA;
5780 const XML_Char *name;
5781 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
5783 if (!oldA)
5784 break;
5785 /* Remember to allocate the scratch byte before the name. */
5786 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
5787 return 0;
5788 name = poolCopyString(&(newDtd->pool), oldA->name);
5789 if (!name)
5790 return 0;
5791 ++name;
5792 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
5793 sizeof(ATTRIBUTE_ID));
5794 if (!newA)
5795 return 0;
5796 newA->maybeTokenized = oldA->maybeTokenized;
5797 if (oldA->prefix) {
5798 newA->xmlns = oldA->xmlns;
5799 if (oldA->prefix == &oldDtd->defaultPrefix)
5800 newA->prefix = &newDtd->defaultPrefix;
5801 else
5802 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
5803 oldA->prefix->name, 0);
5807 /* Copy the element type table. */
5809 hashTableIterInit(&iter, &(oldDtd->elementTypes));
5811 for (;;) {
5812 int i;
5813 ELEMENT_TYPE *newE;
5814 const XML_Char *name;
5815 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5816 if (!oldE)
5817 break;
5818 name = poolCopyString(&(newDtd->pool), oldE->name);
5819 if (!name)
5820 return 0;
5821 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
5822 sizeof(ELEMENT_TYPE));
5823 if (!newE)
5824 return 0;
5825 if (oldE->nDefaultAtts) {
5826 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
5827 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
5828 if (!newE->defaultAtts) {
5829 ms->free_fcn(newE);
5830 return 0;
5833 if (oldE->idAtt)
5834 newE->idAtt = (ATTRIBUTE_ID *)
5835 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
5836 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
5837 if (oldE->prefix)
5838 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
5839 oldE->prefix->name, 0);
5840 for (i = 0; i < newE->nDefaultAtts; i++) {
5841 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
5842 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
5843 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
5844 if (oldE->defaultAtts[i].value) {
5845 newE->defaultAtts[i].value
5846 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
5847 if (!newE->defaultAtts[i].value)
5848 return 0;
5850 else
5851 newE->defaultAtts[i].value = NULL;
5855 /* Copy the entity tables. */
5856 if (!copyEntityTable(oldParser,
5857 &(newDtd->generalEntities),
5858 &(newDtd->pool),
5859 &(oldDtd->generalEntities)))
5860 return 0;
5862 #ifdef XML_DTD
5863 if (!copyEntityTable(oldParser,
5864 &(newDtd->paramEntities),
5865 &(newDtd->pool),
5866 &(oldDtd->paramEntities)))
5867 return 0;
5868 newDtd->paramEntityRead = oldDtd->paramEntityRead;
5869 #endif /* XML_DTD */
5871 newDtd->keepProcessing = oldDtd->keepProcessing;
5872 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
5873 newDtd->standalone = oldDtd->standalone;
5875 /* Don't want deep copying for scaffolding */
5876 newDtd->in_eldecl = oldDtd->in_eldecl;
5877 newDtd->scaffold = oldDtd->scaffold;
5878 newDtd->contentStringLen = oldDtd->contentStringLen;
5879 newDtd->scaffSize = oldDtd->scaffSize;
5880 newDtd->scaffLevel = oldDtd->scaffLevel;
5881 newDtd->scaffIndex = oldDtd->scaffIndex;
5883 return 1;
5884 } /* End dtdCopy */
5886 static int
5887 copyEntityTable(XML_Parser oldParser,
5888 HASH_TABLE *newTable,
5889 STRING_POOL *newPool,
5890 const HASH_TABLE *oldTable)
5892 HASH_TABLE_ITER iter;
5893 const XML_Char *cachedOldBase = NULL;
5894 const XML_Char *cachedNewBase = NULL;
5896 hashTableIterInit(&iter, oldTable);
5898 for (;;) {
5899 ENTITY *newE;
5900 const XML_Char *name;
5901 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
5902 if (!oldE)
5903 break;
5904 name = poolCopyString(newPool, oldE->name);
5905 if (!name)
5906 return 0;
5907 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
5908 if (!newE)
5909 return 0;
5910 if (oldE->systemId) {
5911 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
5912 if (!tem)
5913 return 0;
5914 newE->systemId = tem;
5915 if (oldE->base) {
5916 if (oldE->base == cachedOldBase)
5917 newE->base = cachedNewBase;
5918 else {
5919 cachedOldBase = oldE->base;
5920 tem = poolCopyString(newPool, cachedOldBase);
5921 if (!tem)
5922 return 0;
5923 cachedNewBase = newE->base = tem;
5926 if (oldE->publicId) {
5927 tem = poolCopyString(newPool, oldE->publicId);
5928 if (!tem)
5929 return 0;
5930 newE->publicId = tem;
5933 else {
5934 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
5935 oldE->textLen);
5936 if (!tem)
5937 return 0;
5938 newE->textPtr = tem;
5939 newE->textLen = oldE->textLen;
5941 if (oldE->notation) {
5942 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
5943 if (!tem)
5944 return 0;
5945 newE->notation = tem;
5947 newE->is_param = oldE->is_param;
5948 newE->is_internal = oldE->is_internal;
5950 return 1;
5953 #define INIT_POWER 6
5955 static XML_Bool FASTCALL
5956 keyeq(KEY s1, KEY s2)
5958 for (; *s1 == *s2; s1++, s2++)
5959 if (*s1 == 0)
5960 return XML_TRUE;
5961 return XML_FALSE;
5964 static unsigned long FASTCALL
5965 hash(XML_Parser parser, KEY s)
5967 unsigned long h = hash_secret_salt;
5968 while (*s)
5969 h = CHAR_HASH(h, *s++);
5970 return h;
5973 static NAMED *
5974 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
5976 size_t i;
5977 if (table->size == 0) {
5978 size_t tsize;
5979 if (!createSize)
5980 return NULL;
5981 table->power = INIT_POWER;
5982 /* table->size is a power of 2 */
5983 table->size = (size_t)1 << INIT_POWER;
5984 tsize = table->size * sizeof(NAMED *);
5985 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
5986 if (!table->v) {
5987 table->size = 0;
5988 return NULL;
5990 memset(table->v, 0, tsize);
5991 i = hash(parser, name) & ((unsigned long)table->size - 1);
5993 else {
5994 unsigned long h = hash(parser, name);
5995 unsigned long mask = (unsigned long)table->size - 1;
5996 unsigned char step = 0;
5997 i = h & mask;
5998 while (table->v[i]) {
5999 if (keyeq(name, table->v[i]->name))
6000 return table->v[i];
6001 if (!step)
6002 step = PROBE_STEP(h, mask, table->power);
6003 i < step ? (i += table->size - step) : (i -= step);
6005 if (!createSize)
6006 return NULL;
6008 /* check for overflow (table is half full) */
6009 if (table->used >> (table->power - 1)) {
6010 unsigned char newPower = table->power + 1;
6011 size_t newSize = (size_t)1 << newPower;
6012 unsigned long newMask = (unsigned long)newSize - 1;
6013 size_t tsize = newSize * sizeof(NAMED *);
6014 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6015 if (!newV)
6016 return NULL;
6017 memset(newV, 0, tsize);
6018 for (i = 0; i < table->size; i++)
6019 if (table->v[i]) {
6020 unsigned long newHash = hash(parser, table->v[i]->name);
6021 size_t j = newHash & newMask;
6022 step = 0;
6023 while (newV[j]) {
6024 if (!step)
6025 step = PROBE_STEP(newHash, newMask, newPower);
6026 j < step ? (j += newSize - step) : (j -= step);
6028 newV[j] = table->v[i];
6030 table->mem->free_fcn(table->v);
6031 table->v = newV;
6032 table->power = newPower;
6033 table->size = newSize;
6034 i = h & newMask;
6035 step = 0;
6036 while (table->v[i]) {
6037 if (!step)
6038 step = PROBE_STEP(h, newMask, newPower);
6039 i < step ? (i += newSize - step) : (i -= step);
6043 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6044 if (!table->v[i])
6045 return NULL;
6046 memset(table->v[i], 0, createSize);
6047 table->v[i]->name = name;
6048 (table->used)++;
6049 return table->v[i];
6052 static void FASTCALL
6053 hashTableClear(HASH_TABLE *table)
6055 size_t i;
6056 for (i = 0; i < table->size; i++) {
6057 table->mem->free_fcn(table->v[i]);
6058 table->v[i] = NULL;
6060 table->used = 0;
6063 static void FASTCALL
6064 hashTableDestroy(HASH_TABLE *table)
6066 size_t i;
6067 for (i = 0; i < table->size; i++)
6068 table->mem->free_fcn(table->v[i]);
6069 table->mem->free_fcn(table->v);
6072 static void FASTCALL
6073 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
6075 p->power = 0;
6076 p->size = 0;
6077 p->used = 0;
6078 p->v = NULL;
6079 p->mem = ms;
6082 static void FASTCALL
6083 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
6085 iter->p = table->v;
6086 iter->end = iter->p + table->size;
6089 static NAMED * FASTCALL
6090 hashTableIterNext(HASH_TABLE_ITER *iter)
6092 while (iter->p != iter->end) {
6093 NAMED *tem = *(iter->p)++;
6094 if (tem)
6095 return tem;
6097 return NULL;
6100 static void FASTCALL
6101 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
6103 pool->blocks = NULL;
6104 pool->freeBlocks = NULL;
6105 pool->start = NULL;
6106 pool->ptr = NULL;
6107 pool->end = NULL;
6108 pool->mem = ms;
6111 static void FASTCALL
6112 poolClear(STRING_POOL *pool)
6114 if (!pool->freeBlocks)
6115 pool->freeBlocks = pool->blocks;
6116 else {
6117 BLOCK *p = pool->blocks;
6118 while (p) {
6119 BLOCK *tem = p->next;
6120 p->next = pool->freeBlocks;
6121 pool->freeBlocks = p;
6122 p = tem;
6125 pool->blocks = NULL;
6126 pool->start = NULL;
6127 pool->ptr = NULL;
6128 pool->end = NULL;
6131 static void FASTCALL
6132 poolDestroy(STRING_POOL *pool)
6134 BLOCK *p = pool->blocks;
6135 while (p) {
6136 BLOCK *tem = p->next;
6137 pool->mem->free_fcn(p);
6138 p = tem;
6140 p = pool->freeBlocks;
6141 while (p) {
6142 BLOCK *tem = p->next;
6143 pool->mem->free_fcn(p);
6144 p = tem;
6148 static XML_Char *
6149 poolAppend(STRING_POOL *pool, const ENCODING *enc,
6150 const char *ptr, const char *end)
6152 if (!pool->ptr && !poolGrow(pool))
6153 return NULL;
6154 for (;;) {
6155 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6156 if (ptr == end)
6157 break;
6158 if (!poolGrow(pool))
6159 return NULL;
6161 return pool->start;
6164 static const XML_Char * FASTCALL
6165 poolCopyString(STRING_POOL *pool, const XML_Char *s)
6167 do {
6168 if (!poolAppendChar(pool, *s))
6169 return NULL;
6170 } while (*s++);
6171 s = pool->start;
6172 poolFinish(pool);
6173 return s;
6176 static const XML_Char *
6177 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
6179 if (!pool->ptr && !poolGrow(pool))
6180 return NULL;
6181 for (; n > 0; --n, s++) {
6182 if (!poolAppendChar(pool, *s))
6183 return NULL;
6185 s = pool->start;
6186 poolFinish(pool);
6187 return s;
6190 static const XML_Char * FASTCALL
6191 poolAppendString(STRING_POOL *pool, const XML_Char *s)
6193 while (*s) {
6194 if (!poolAppendChar(pool, *s))
6195 return NULL;
6196 s++;
6198 return pool->start;
6201 static XML_Char *
6202 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6203 const char *ptr, const char *end)
6205 if (!poolAppend(pool, enc, ptr, end))
6206 return NULL;
6207 if (pool->ptr == pool->end && !poolGrow(pool))
6208 return NULL;
6209 *(pool->ptr)++ = 0;
6210 return pool->start;
6213 static XML_Bool FASTCALL
6214 poolGrow(STRING_POOL *pool)
6216 if (pool->freeBlocks) {
6217 if (pool->start == 0) {
6218 pool->blocks = pool->freeBlocks;
6219 pool->freeBlocks = pool->freeBlocks->next;
6220 pool->blocks->next = NULL;
6221 pool->start = pool->blocks->s;
6222 pool->end = pool->start + pool->blocks->size;
6223 pool->ptr = pool->start;
6224 return XML_TRUE;
6226 if (pool->end - pool->start < pool->freeBlocks->size) {
6227 BLOCK *tem = pool->freeBlocks->next;
6228 pool->freeBlocks->next = pool->blocks;
6229 pool->blocks = pool->freeBlocks;
6230 pool->freeBlocks = tem;
6231 memcpy(pool->blocks->s, pool->start,
6232 (pool->end - pool->start) * sizeof(XML_Char));
6233 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6234 pool->start = pool->blocks->s;
6235 pool->end = pool->start + pool->blocks->size;
6236 return XML_TRUE;
6239 if (pool->blocks && pool->start == pool->blocks->s) {
6240 int blockSize = (int)(pool->end - pool->start)*2;
6241 BLOCK *temp = (BLOCK *)
6242 pool->mem->realloc_fcn(pool->blocks,
6243 (offsetof(BLOCK, s)
6244 + blockSize * sizeof(XML_Char)));
6245 if (temp == NULL)
6246 return XML_FALSE;
6247 pool->blocks = temp;
6248 pool->blocks->size = blockSize;
6249 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6250 pool->start = pool->blocks->s;
6251 pool->end = pool->start + blockSize;
6253 else {
6254 BLOCK *tem;
6255 int blockSize = (int)(pool->end - pool->start);
6256 if (blockSize < INIT_BLOCK_SIZE)
6257 blockSize = INIT_BLOCK_SIZE;
6258 else
6259 blockSize *= 2;
6260 tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s)
6261 + blockSize * sizeof(XML_Char));
6262 if (!tem)
6263 return XML_FALSE;
6264 tem->size = blockSize;
6265 tem->next = pool->blocks;
6266 pool->blocks = tem;
6267 if (pool->ptr != pool->start)
6268 memcpy(tem->s, pool->start,
6269 (pool->ptr - pool->start) * sizeof(XML_Char));
6270 pool->ptr = tem->s + (pool->ptr - pool->start);
6271 pool->start = tem->s;
6272 pool->end = tem->s + blockSize;
6274 return XML_TRUE;
6277 static int FASTCALL
6278 nextScaffoldPart(XML_Parser parser)
6280 DTD * const dtd = _dtd; /* save one level of indirection */
6281 CONTENT_SCAFFOLD * me;
6282 int next;
6284 if (!dtd->scaffIndex) {
6285 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
6286 if (!dtd->scaffIndex)
6287 return -1;
6288 dtd->scaffIndex[0] = 0;
6291 if (dtd->scaffCount >= dtd->scaffSize) {
6292 CONTENT_SCAFFOLD *temp;
6293 if (dtd->scaffold) {
6294 temp = (CONTENT_SCAFFOLD *)
6295 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
6296 if (temp == NULL)
6297 return -1;
6298 dtd->scaffSize *= 2;
6300 else {
6301 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
6302 * sizeof(CONTENT_SCAFFOLD));
6303 if (temp == NULL)
6304 return -1;
6305 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
6307 dtd->scaffold = temp;
6309 next = dtd->scaffCount++;
6310 me = &dtd->scaffold[next];
6311 if (dtd->scaffLevel) {
6312 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
6313 if (parent->lastchild) {
6314 dtd->scaffold[parent->lastchild].nextsib = next;
6316 if (!parent->childcnt)
6317 parent->firstchild = next;
6318 parent->lastchild = next;
6319 parent->childcnt++;
6321 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6322 return next;
6325 static void
6326 build_node(XML_Parser parser,
6327 int src_node,
6328 XML_Content *dest,
6329 XML_Content **contpos,
6330 XML_Char **strpos)
6332 DTD * const dtd = _dtd; /* save one level of indirection */
6333 dest->type = dtd->scaffold[src_node].type;
6334 dest->quant = dtd->scaffold[src_node].quant;
6335 if (dest->type == XML_CTYPE_NAME) {
6336 const XML_Char *src;
6337 dest->name = *strpos;
6338 src = dtd->scaffold[src_node].name;
6339 for (;;) {
6340 *(*strpos)++ = *src;
6341 if (!*src)
6342 break;
6343 src++;
6345 dest->numchildren = 0;
6346 dest->children = NULL;
6348 else {
6349 unsigned int i;
6350 int cn;
6351 dest->numchildren = dtd->scaffold[src_node].childcnt;
6352 dest->children = *contpos;
6353 *contpos += dest->numchildren;
6354 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
6355 i < dest->numchildren;
6356 i++, cn = dtd->scaffold[cn].nextsib) {
6357 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6359 dest->name = NULL;
6363 static XML_Content *
6364 build_model (XML_Parser parser)
6366 DTD * const dtd = _dtd; /* save one level of indirection */
6367 XML_Content *ret;
6368 XML_Content *cpos;
6369 XML_Char * str;
6370 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6371 + (dtd->contentStringLen * sizeof(XML_Char)));
6373 ret = (XML_Content *)MALLOC(allocsize);
6374 if (!ret)
6375 return NULL;
6377 str = (XML_Char *) (&ret[dtd->scaffCount]);
6378 cpos = &ret[1];
6380 build_node(parser, 0, ret, &cpos, &str);
6381 return ret;
6384 static ELEMENT_TYPE *
6385 getElementType(XML_Parser parser,
6386 const ENCODING *enc,
6387 const char *ptr,
6388 const char *end)
6390 DTD * const dtd = _dtd; /* save one level of indirection */
6391 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
6392 ELEMENT_TYPE *ret;
6394 if (!name)
6395 return NULL;
6396 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
6397 if (!ret)
6398 return NULL;
6399 if (ret->name != name)
6400 poolDiscard(&dtd->pool);
6401 else {
6402 poolFinish(&dtd->pool);
6403 if (!setElementTypePrefix(parser, ret))
6404 return NULL;
6406 return ret;