d3d8/tests: Test the presentation parameters after creating an additional swap chain.
[wine.git] / libs / xml2 / parserInternals.c
blobc36bbfb62c679857df3be4c32a46acf30df6e19e
1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
5 * See Copyright for the status of this software.
7 * daniel@veillard.com
8 */
10 #define IN_LIBXML
11 #include "libxml.h"
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/valid.h>
28 #include <libxml/entities.h>
29 #include <libxml/xmlerror.h>
30 #include <libxml/encoding.h>
31 #include <libxml/valid.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/uri.h>
34 #include <libxml/dict.h>
35 #include <libxml/SAX.h>
36 #ifdef LIBXML_CATALOG_ENABLED
37 #include <libxml/catalog.h>
38 #endif
39 #include <libxml/globals.h>
40 #include <libxml/chvalid.h>
42 #define CUR(ctxt) ctxt->input->cur
43 #define END(ctxt) ctxt->input->end
44 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
46 #include "private/buf.h"
47 #include "private/enc.h"
48 #include "private/error.h"
49 #include "private/io.h"
50 #include "private/parser.h"
53 * Various global defaults for parsing
56 /**
57 * xmlCheckVersion:
58 * @version: the include version number
60 * check the compiled lib version against the include one.
61 * This can warn or immediately kill the application
63 void
64 xmlCheckVersion(int version) {
65 int myversion = LIBXML_VERSION;
67 xmlInitParser();
69 if ((myversion / 10000) != (version / 10000)) {
70 xmlGenericError(xmlGenericErrorContext,
71 "Fatal: program compiled against libxml %d using libxml %d\n",
72 (version / 10000), (myversion / 10000));
73 fprintf(stderr,
74 "Fatal: program compiled against libxml %d using libxml %d\n",
75 (version / 10000), (myversion / 10000));
77 if ((myversion / 100) < (version / 100)) {
78 xmlGenericError(xmlGenericErrorContext,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
85 /************************************************************************
86 * *
87 * Some factorized error routines *
88 * *
89 ************************************************************************/
92 /**
93 * xmlErrMemory:
94 * @ctxt: an XML parser context
95 * @extra: extra information
97 * Handle a redefinition of attribute error
99 void
100 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
102 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103 (ctxt->instate == XML_PARSER_EOF))
104 return;
105 if (ctxt != NULL) {
106 ctxt->errNo = XML_ERR_NO_MEMORY;
107 ctxt->instate = XML_PARSER_EOF;
108 ctxt->disableSAX = 1;
110 if (extra)
111 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113 NULL, NULL, 0, 0,
114 "Memory allocation failed : %s\n", extra);
115 else
116 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118 NULL, NULL, 0, 0, "Memory allocation failed\n");
122 * __xmlErrEncoding:
123 * @ctxt: an XML parser context
124 * @xmlerr: the error number
125 * @msg: the error message
126 * @str1: an string info
127 * @str2: an string info
129 * Handle an encoding error
131 void
132 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133 const char *msg, const xmlChar * str1, const xmlChar * str2)
135 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136 (ctxt->instate == XML_PARSER_EOF))
137 return;
138 if (ctxt != NULL)
139 ctxt->errNo = xmlerr;
140 __xmlRaiseError(NULL, NULL, NULL,
141 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142 NULL, 0, (const char *) str1, (const char *) str2,
143 NULL, 0, 0, msg, str1, str2);
144 if (ctxt != NULL) {
145 ctxt->wellFormed = 0;
146 if (ctxt->recovery == 0)
147 ctxt->disableSAX = 1;
152 * xmlErrInternal:
153 * @ctxt: an XML parser context
154 * @msg: the error message
155 * @str: error information
157 * Handle an internal error
159 static void LIBXML_ATTR_FORMAT(2,0)
160 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
162 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163 (ctxt->instate == XML_PARSER_EOF))
164 return;
165 if (ctxt != NULL)
166 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167 __xmlRaiseError(NULL, NULL, NULL,
168 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170 0, 0, msg, str);
171 if (ctxt != NULL) {
172 ctxt->wellFormed = 0;
173 if (ctxt->recovery == 0)
174 ctxt->disableSAX = 1;
179 * xmlErrEncodingInt:
180 * @ctxt: an XML parser context
181 * @error: the error number
182 * @msg: the error message
183 * @val: an integer value
185 * n encoding error
187 static void LIBXML_ATTR_FORMAT(3,0)
188 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189 const char *msg, int val)
191 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192 (ctxt->instate == XML_PARSER_EOF))
193 return;
194 if (ctxt != NULL)
195 ctxt->errNo = error;
196 __xmlRaiseError(NULL, NULL, NULL,
197 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199 if (ctxt != NULL) {
200 ctxt->wellFormed = 0;
201 if (ctxt->recovery == 0)
202 ctxt->disableSAX = 1;
207 * xmlIsLetter:
208 * @c: an unicode character (int)
210 * Check whether the character is allowed by the production
211 * [84] Letter ::= BaseChar | Ideographic
213 * Returns 0 if not, non-zero otherwise
216 xmlIsLetter(int c) {
217 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
220 /************************************************************************
222 * Input handling functions for progressive parsing *
224 ************************************************************************/
226 /* #define DEBUG_INPUT */
227 /* #define DEBUG_STACK */
228 /* #define DEBUG_PUSH */
231 /* we need to keep enough input to show errors in context */
232 #define LINE_LEN 80
234 #ifdef DEBUG_INPUT
235 #define CHECK_BUFFER(in) check_buffer(in)
237 static
238 void check_buffer(xmlParserInputPtr in) {
239 if (in->base != xmlBufContent(in->buf->buffer)) {
240 xmlGenericError(xmlGenericErrorContext,
241 "xmlParserInput: base mismatch problem\n");
243 if (in->cur < in->base) {
244 xmlGenericError(xmlGenericErrorContext,
245 "xmlParserInput: cur < base problem\n");
247 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248 xmlGenericError(xmlGenericErrorContext,
249 "xmlParserInput: cur > base + use problem\n");
251 xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252 (void *) in, (int) xmlBufContent(in->buf->buffer),
253 in->cur - in->base, xmlBufUse(in->buf->buffer));
256 #else
257 #define CHECK_BUFFER(in)
258 #endif
262 * xmlHaltParser:
263 * @ctxt: an XML parser context
265 * Blocks further parser processing don't override error
266 * for internal use
268 void
269 xmlHaltParser(xmlParserCtxtPtr ctxt) {
270 if (ctxt == NULL)
271 return;
272 ctxt->instate = XML_PARSER_EOF;
273 ctxt->disableSAX = 1;
274 while (ctxt->inputNr > 1)
275 xmlFreeInputStream(inputPop(ctxt));
276 if (ctxt->input != NULL) {
278 * in case there was a specific allocation deallocate before
279 * overriding base
281 if (ctxt->input->free != NULL) {
282 ctxt->input->free((xmlChar *) ctxt->input->base);
283 ctxt->input->free = NULL;
285 if (ctxt->input->buf != NULL) {
286 xmlFreeParserInputBuffer(ctxt->input->buf);
287 ctxt->input->buf = NULL;
289 ctxt->input->cur = BAD_CAST"";
290 ctxt->input->length = 0;
291 ctxt->input->base = ctxt->input->cur;
292 ctxt->input->end = ctxt->input->cur;
297 * xmlParserInputRead:
298 * @in: an XML parser input
299 * @len: an indicative size for the lookahead
301 * DEPRECATED: This function was internal and is deprecated.
303 * Returns -1 as this is an error to use it.
306 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
307 return(-1);
311 * xmlParserGrow:
312 * @ctxt: an XML parser context
315 xmlParserGrow(xmlParserCtxtPtr ctxt) {
316 xmlParserInputPtr in = ctxt->input;
317 xmlParserInputBufferPtr buf = in->buf;
318 ptrdiff_t curEnd = in->end - in->cur;
319 ptrdiff_t curBase = in->cur - in->base;
320 int ret;
322 if (buf == NULL)
323 return(0);
324 /* Don't grow push parser buffer. */
325 if (ctxt->progressive)
326 return(0);
327 /* Don't grow memory buffers. */
328 if ((buf->encoder == NULL) && (buf->readcallback == NULL))
329 return(0);
331 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
332 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
333 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
334 xmlErrInternal(ctxt, "Huge input lookup", NULL);
335 xmlHaltParser(ctxt);
336 return(-1);
339 if (curEnd >= INPUT_CHUNK)
340 return(0);
342 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
343 xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
345 /* TODO: Get error code from xmlParserInputBufferGrow */
346 if (ret < 0) {
347 xmlErrInternal(ctxt, "Growing input buffer", NULL);
348 xmlHaltParser(ctxt);
351 return(ret);
355 * xmlParserInputGrow:
356 * @in: an XML parser input
357 * @len: an indicative size for the lookahead
359 * DEPRECATED: Don't use.
361 * This function increase the input for the parser. It tries to
362 * preserve pointers to the input buffer, and keep already read data
364 * Returns the amount of char read, or -1 in case of error, 0 indicate the
365 * end of this entity
368 xmlParserInputGrow(xmlParserInputPtr in, int len) {
369 int ret;
370 size_t indx;
372 if ((in == NULL) || (len < 0)) return(-1);
373 #ifdef DEBUG_INPUT
374 xmlGenericError(xmlGenericErrorContext, "Grow\n");
375 #endif
376 if (in->buf == NULL) return(-1);
377 if (in->base == NULL) return(-1);
378 if (in->cur == NULL) return(-1);
379 if (in->buf->buffer == NULL) return(-1);
381 /* Don't grow memory buffers. */
382 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
383 return(0);
385 CHECK_BUFFER(in);
387 indx = in->cur - in->base;
388 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
390 CHECK_BUFFER(in);
392 return(0);
394 ret = xmlParserInputBufferGrow(in->buf, len);
396 in->base = xmlBufContent(in->buf->buffer);
397 if (in->base == NULL) {
398 in->base = BAD_CAST "";
399 in->cur = in->base;
400 in->end = in->base;
401 return(-1);
403 in->cur = in->base + indx;
404 in->end = xmlBufEnd(in->buf->buffer);
406 CHECK_BUFFER(in);
408 return(ret);
412 * xmlParserShrink:
413 * @ctxt: an XML parser context
415 void
416 xmlParserShrink(xmlParserCtxtPtr ctxt) {
417 xmlParserInputPtr in = ctxt->input;
418 xmlParserInputBufferPtr buf = in->buf;
419 size_t used;
421 /* Don't shrink pull parser memory buffers. */
422 if ((buf == NULL) ||
423 ((ctxt->progressive == 0) &&
424 (buf->encoder == NULL) && (buf->readcallback == NULL)))
425 return;
427 used = in->cur - in->base;
429 * Do not shrink on large buffers whose only a tiny fraction
430 * was consumed
432 if (used > INPUT_CHUNK) {
433 size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
435 if (res > 0) {
436 used -= res;
437 if ((res > ULONG_MAX) ||
438 (in->consumed > ULONG_MAX - (unsigned long)res))
439 in->consumed = ULONG_MAX;
440 else
441 in->consumed += res;
445 xmlBufSetInputBaseCur(buf->buffer, in, 0, used);
449 * xmlParserInputShrink:
450 * @in: an XML parser input
452 * DEPRECATED: Don't use.
454 * This function removes used input for the parser.
456 void
457 xmlParserInputShrink(xmlParserInputPtr in) {
458 size_t used;
459 size_t ret;
461 #ifdef DEBUG_INPUT
462 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
463 #endif
464 if (in == NULL) return;
465 if (in->buf == NULL) return;
466 if (in->base == NULL) return;
467 if (in->cur == NULL) return;
468 if (in->buf->buffer == NULL) return;
470 CHECK_BUFFER(in);
472 used = in->cur - in->base;
474 * Do not shrink on large buffers whose only a tiny fraction
475 * was consumed
477 if (used > INPUT_CHUNK) {
478 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
479 if (ret > 0) {
480 used -= ret;
481 if ((ret > ULONG_MAX) ||
482 (in->consumed > ULONG_MAX - (unsigned long)ret))
483 in->consumed = ULONG_MAX;
484 else
485 in->consumed += ret;
489 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
490 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
493 in->base = xmlBufContent(in->buf->buffer);
494 if (in->base == NULL) {
495 /* TODO: raise error */
496 in->base = BAD_CAST "";
497 in->cur = in->base;
498 in->end = in->base;
499 return;
501 in->cur = in->base + used;
502 in->end = xmlBufEnd(in->buf->buffer);
504 CHECK_BUFFER(in);
507 /************************************************************************
509 * UTF8 character input and related functions *
511 ************************************************************************/
514 * xmlNextChar:
515 * @ctxt: the XML parser context
517 * DEPRECATED: Internal function, do not use.
519 * Skip to the next char input char.
522 void
523 xmlNextChar(xmlParserCtxtPtr ctxt)
525 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
526 (ctxt->input == NULL))
527 return;
529 if (!(VALID_CTXT(ctxt))) {
530 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
531 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
532 xmlStopParser(ctxt);
533 return;
536 if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
537 if (xmlParserGrow(ctxt) < 0)
538 return;
539 if (ctxt->input->cur >= ctxt->input->end)
540 return;
543 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
544 const unsigned char *cur;
545 unsigned char c;
548 * 2.11 End-of-Line Handling
549 * the literal two-character sequence "#xD#xA" or a standalone
550 * literal #xD, an XML processor must pass to the application
551 * the single character #xA.
553 if (*(ctxt->input->cur) == '\n') {
554 ctxt->input->line++; ctxt->input->col = 1;
555 } else
556 ctxt->input->col++;
559 * We are supposed to handle UTF8, check it's valid
560 * From rfc2044: encoding of the Unicode values on UTF-8:
562 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
563 * 0000 0000-0000 007F 0xxxxxxx
564 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
565 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
567 * Check for the 0x110000 limit too
569 cur = ctxt->input->cur;
571 c = *cur;
572 if (c & 0x80) {
573 size_t avail;
575 if (c == 0xC0)
576 goto encoding_error;
578 avail = ctxt->input->end - ctxt->input->cur;
580 if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
581 goto encoding_error;
582 if ((c & 0xe0) == 0xe0) {
583 unsigned int val;
585 if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
586 goto encoding_error;
587 if ((c & 0xf0) == 0xf0) {
588 if (((c & 0xf8) != 0xf0) ||
589 (avail < 4) || ((cur[3] & 0xc0) != 0x80))
590 goto encoding_error;
591 /* 4-byte code */
592 ctxt->input->cur += 4;
593 val = (cur[0] & 0x7) << 18;
594 val |= (cur[1] & 0x3f) << 12;
595 val |= (cur[2] & 0x3f) << 6;
596 val |= cur[3] & 0x3f;
597 } else {
598 /* 3-byte code */
599 ctxt->input->cur += 3;
600 val = (cur[0] & 0xf) << 12;
601 val |= (cur[1] & 0x3f) << 6;
602 val |= cur[2] & 0x3f;
604 if (((val > 0xd7ff) && (val < 0xe000)) ||
605 ((val > 0xfffd) && (val < 0x10000)) ||
606 (val >= 0x110000)) {
607 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
608 "Char 0x%X out of allowed range\n",
609 val);
611 } else
612 /* 2-byte code */
613 ctxt->input->cur += 2;
614 } else
615 /* 1-byte code */
616 ctxt->input->cur++;
617 } else {
619 * Assume it's a fixed length encoding (1) with
620 * a compatible encoding for the ASCII set, since
621 * XML constructs only use < 128 chars
624 if (*(ctxt->input->cur) == '\n') {
625 ctxt->input->line++; ctxt->input->col = 1;
626 } else
627 ctxt->input->col++;
628 ctxt->input->cur++;
630 return;
631 encoding_error:
633 * If we detect an UTF8 error that probably mean that the
634 * input encoding didn't get properly advertised in the
635 * declaration header. Report the error and switch the encoding
636 * to ISO-Latin-1 (if you don't like this policy, just declare the
637 * encoding !)
639 if ((ctxt == NULL) || (ctxt->input == NULL) ||
640 (ctxt->input->end - ctxt->input->cur < 4)) {
641 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
642 "Input is not proper UTF-8, indicate encoding !\n",
643 NULL, NULL);
644 } else {
645 char buffer[150];
647 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
648 ctxt->input->cur[0], ctxt->input->cur[1],
649 ctxt->input->cur[2], ctxt->input->cur[3]);
650 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
651 "Input is not proper UTF-8, indicate encoding !\n%s",
652 BAD_CAST buffer, NULL);
654 ctxt->charset = XML_CHAR_ENCODING_8859_1;
655 ctxt->input->cur++;
656 return;
660 * xmlCurrentChar:
661 * @ctxt: the XML parser context
662 * @len: pointer to the length of the char read
664 * DEPRECATED: Internal function, do not use.
666 * The current char value, if using UTF-8 this may actually span multiple
667 * bytes in the input buffer. Implement the end of line normalization:
668 * 2.11 End-of-Line Handling
669 * Wherever an external parsed entity or the literal entity value
670 * of an internal parsed entity contains either the literal two-character
671 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
672 * must pass to the application the single character #xA.
673 * This behavior can conveniently be produced by normalizing all
674 * line breaks to #xA on input, before parsing.)
676 * Returns the current char value and its length
680 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
681 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
682 if (ctxt->instate == XML_PARSER_EOF)
683 return(0);
685 if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
686 (xmlParserGrow(ctxt) < 0))
687 return(0);
689 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
690 *len = 1;
691 return(*ctxt->input->cur);
693 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
695 * We are supposed to handle UTF8, check it's valid
696 * From rfc2044: encoding of the Unicode values on UTF-8:
698 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
699 * 0000 0000-0000 007F 0xxxxxxx
700 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
701 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
703 * Check for the 0x110000 limit too
705 const unsigned char *cur = ctxt->input->cur;
706 unsigned char c;
707 unsigned int val;
709 c = *cur;
710 if (c & 0x80) {
711 size_t avail;
713 if (((c & 0x40) == 0) || (c == 0xC0))
714 goto encoding_error;
716 avail = ctxt->input->end - ctxt->input->cur;
718 if (avail < 2)
719 goto incomplete_sequence;
720 if ((cur[1] & 0xc0) != 0x80)
721 goto encoding_error;
722 if ((c & 0xe0) == 0xe0) {
723 if (avail < 3)
724 goto incomplete_sequence;
725 if ((cur[2] & 0xc0) != 0x80)
726 goto encoding_error;
727 if ((c & 0xf0) == 0xf0) {
728 if (avail < 4)
729 goto incomplete_sequence;
730 if (((c & 0xf8) != 0xf0) ||
731 ((cur[3] & 0xc0) != 0x80))
732 goto encoding_error;
733 /* 4-byte code */
734 *len = 4;
735 val = (cur[0] & 0x7) << 18;
736 val |= (cur[1] & 0x3f) << 12;
737 val |= (cur[2] & 0x3f) << 6;
738 val |= cur[3] & 0x3f;
739 if (val < 0x10000)
740 goto encoding_error;
741 } else {
742 /* 3-byte code */
743 *len = 3;
744 val = (cur[0] & 0xf) << 12;
745 val |= (cur[1] & 0x3f) << 6;
746 val |= cur[2] & 0x3f;
747 if (val < 0x800)
748 goto encoding_error;
750 } else {
751 /* 2-byte code */
752 *len = 2;
753 val = (cur[0] & 0x1f) << 6;
754 val |= cur[1] & 0x3f;
755 if (val < 0x80)
756 goto encoding_error;
758 if (!IS_CHAR(val)) {
759 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
760 "Char 0x%X out of allowed range\n", val);
762 return(val);
763 } else {
764 /* 1-byte code */
765 *len = 1;
766 if ((*ctxt->input->cur == 0) &&
767 (ctxt->input->end > ctxt->input->cur)) {
768 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
769 "Char 0x0 out of allowed range\n", 0);
771 if (*ctxt->input->cur == 0xD) {
772 if (ctxt->input->cur[1] == 0xA) {
773 ctxt->input->cur++;
775 return(0xA);
777 return(*ctxt->input->cur);
781 * Assume it's a fixed length encoding (1) with
782 * a compatible encoding for the ASCII set, since
783 * XML constructs only use < 128 chars
785 *len = 1;
786 if (*ctxt->input->cur == 0xD) {
787 if (ctxt->input->cur[1] == 0xA) {
788 ctxt->input->cur++;
790 return(0xA);
792 return(*ctxt->input->cur);
794 encoding_error:
796 * If we detect an UTF8 error that probably mean that the
797 * input encoding didn't get properly advertised in the
798 * declaration header. Report the error and switch the encoding
799 * to ISO-Latin-1 (if you don't like this policy, just declare the
800 * encoding !)
802 if (ctxt->input->end - ctxt->input->cur < 4) {
803 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
804 "Input is not proper UTF-8, indicate encoding !\n",
805 NULL, NULL);
806 } else {
807 char buffer[150];
809 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
810 ctxt->input->cur[0], ctxt->input->cur[1],
811 ctxt->input->cur[2], ctxt->input->cur[3]);
812 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
813 "Input is not proper UTF-8, indicate encoding !\n%s",
814 BAD_CAST buffer, NULL);
816 ctxt->charset = XML_CHAR_ENCODING_8859_1;
817 *len = 1;
818 return(*ctxt->input->cur);
820 incomplete_sequence:
822 * An encoding problem may arise from a truncated input buffer
823 * splitting a character in the middle. In that case do not raise
824 * an error but return 0. This should only happen when push parsing
825 * char data.
827 *len = 0;
828 return(0);
832 * xmlStringCurrentChar:
833 * @ctxt: the XML parser context
834 * @cur: pointer to the beginning of the char
835 * @len: pointer to the length of the char read
837 * DEPRECATED: Internal function, do not use.
839 * The current char value, if using UTF-8 this may actually span multiple
840 * bytes in the input buffer.
842 * Returns the current char value and its length
846 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
848 if ((len == NULL) || (cur == NULL)) return(0);
849 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
851 * We are supposed to handle UTF8, check it's valid
852 * From rfc2044: encoding of the Unicode values on UTF-8:
854 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
855 * 0000 0000-0000 007F 0xxxxxxx
856 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
857 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
859 * Check for the 0x110000 limit too
861 unsigned char c;
862 unsigned int val;
864 c = *cur;
865 if (c & 0x80) {
866 if ((cur[1] & 0xc0) != 0x80)
867 goto encoding_error;
868 if ((c & 0xe0) == 0xe0) {
870 if ((cur[2] & 0xc0) != 0x80)
871 goto encoding_error;
872 if ((c & 0xf0) == 0xf0) {
873 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
874 goto encoding_error;
875 /* 4-byte code */
876 *len = 4;
877 val = (cur[0] & 0x7) << 18;
878 val |= (cur[1] & 0x3f) << 12;
879 val |= (cur[2] & 0x3f) << 6;
880 val |= cur[3] & 0x3f;
881 } else {
882 /* 3-byte code */
883 *len = 3;
884 val = (cur[0] & 0xf) << 12;
885 val |= (cur[1] & 0x3f) << 6;
886 val |= cur[2] & 0x3f;
888 } else {
889 /* 2-byte code */
890 *len = 2;
891 val = (cur[0] & 0x1f) << 6;
892 val |= cur[1] & 0x3f;
894 if (!IS_CHAR(val)) {
895 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
896 "Char 0x%X out of allowed range\n", val);
898 return (val);
899 } else {
900 /* 1-byte code */
901 *len = 1;
902 return (*cur);
906 * Assume it's a fixed length encoding (1) with
907 * a compatible encoding for the ASCII set, since
908 * XML constructs only use < 128 chars
910 *len = 1;
911 return (*cur);
912 encoding_error:
915 * An encoding problem may arise from a truncated input buffer
916 * splitting a character in the middle. In that case do not raise
917 * an error but return 0 to indicate an end of stream problem
919 if ((ctxt == NULL) || (ctxt->input == NULL) ||
920 (ctxt->input->end - ctxt->input->cur < 4)) {
921 *len = 0;
922 return(0);
925 * If we detect an UTF8 error that probably mean that the
926 * input encoding didn't get properly advertised in the
927 * declaration header. Report the error and switch the encoding
928 * to ISO-Latin-1 (if you don't like this policy, just declare the
929 * encoding !)
932 char buffer[150];
934 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
935 ctxt->input->cur[0], ctxt->input->cur[1],
936 ctxt->input->cur[2], ctxt->input->cur[3]);
937 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
938 "Input is not proper UTF-8, indicate encoding !\n%s",
939 BAD_CAST buffer, NULL);
941 *len = 1;
942 return (*cur);
946 * xmlCopyCharMultiByte:
947 * @out: pointer to an array of xmlChar
948 * @val: the char value
950 * append the char value in the array
952 * Returns the number of xmlChar written
955 xmlCopyCharMultiByte(xmlChar *out, int val) {
956 if ((out == NULL) || (val < 0)) return(0);
958 * We are supposed to handle UTF8, check it's valid
959 * From rfc2044: encoding of the Unicode values on UTF-8:
961 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
962 * 0000 0000-0000 007F 0xxxxxxx
963 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
964 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
966 if (val >= 0x80) {
967 xmlChar *savedout = out;
968 int bits;
969 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
970 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
971 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
972 else {
973 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
974 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
975 val);
976 return(0);
978 for ( ; bits >= 0; bits-= 6)
979 *out++= ((val >> bits) & 0x3F) | 0x80 ;
980 return (out - savedout);
982 *out = val;
983 return 1;
987 * xmlCopyChar:
988 * @len: Ignored, compatibility
989 * @out: pointer to an array of xmlChar
990 * @val: the char value
992 * append the char value in the array
994 * Returns the number of xmlChar written
998 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
999 if ((out == NULL) || (val < 0)) return(0);
1000 /* the len parameter is ignored */
1001 if (val >= 0x80) {
1002 return(xmlCopyCharMultiByte (out, val));
1004 *out = val;
1005 return 1;
1008 /************************************************************************
1010 * Commodity functions to switch encodings *
1012 ************************************************************************/
1014 static xmlCharEncodingHandlerPtr
1015 xmlDetectEBCDIC(xmlParserInputPtr input) {
1016 xmlChar out[200];
1017 xmlCharEncodingHandlerPtr handler;
1018 int inlen, outlen, res, i;
1021 * To detect the EBCDIC code page, we convert the first 200 bytes
1022 * to EBCDIC-US and try to find the encoding declaration.
1024 handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1025 if (handler == NULL)
1026 return(NULL);
1027 outlen = sizeof(out) - 1;
1028 inlen = input->end - input->cur;
1029 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen, 0);
1030 if (res < 0)
1031 return(handler);
1032 out[outlen] = 0;
1034 for (i = 0; i < outlen; i++) {
1035 if (out[i] == '>')
1036 break;
1037 if ((out[i] == 'e') &&
1038 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1039 int start, cur, quote;
1041 i += 8;
1042 while (IS_BLANK_CH(out[i]))
1043 i += 1;
1044 if (out[i++] != '=')
1045 break;
1046 while (IS_BLANK_CH(out[i]))
1047 i += 1;
1048 quote = out[i++];
1049 if ((quote != '\'') && (quote != '"'))
1050 break;
1051 start = i;
1052 cur = out[i];
1053 while (((cur >= 'a') && (cur <= 'z')) ||
1054 ((cur >= 'A') && (cur <= 'Z')) ||
1055 ((cur >= '0') && (cur <= '9')) ||
1056 (cur == '.') || (cur == '_') ||
1057 (cur == '-'))
1058 cur = out[++i];
1059 if (cur != quote)
1060 break;
1061 out[i] = 0;
1062 xmlCharEncCloseFunc(handler);
1063 handler = xmlFindCharEncodingHandler((char *) out + start);
1064 break;
1068 return(handler);
1072 * xmlSwitchEncoding:
1073 * @ctxt: the parser context
1074 * @enc: the encoding value (number)
1076 * change the input functions when discovering the character encoding
1077 * of a given entity.
1079 * Returns 0 in case of success, -1 otherwise
1082 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1084 xmlCharEncodingHandlerPtr handler;
1085 int ret;
1087 if (ctxt == NULL) return(-1);
1090 * FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1092 * Note that we look for a decoded UTF-8 BOM when switching to UTF-16.
1093 * This is mostly useless but Webkit/Chromium relies on this behavior.
1094 * See https://bugs.chromium.org/p/chromium/issues/detail?id=1451026
1096 if ((ctxt->input != NULL) &&
1097 (ctxt->input->consumed == 0) &&
1098 (ctxt->input->cur != NULL) &&
1099 (ctxt->input->cur == ctxt->input->base) &&
1100 ((enc == XML_CHAR_ENCODING_UTF8) ||
1101 (enc == XML_CHAR_ENCODING_UTF16LE) ||
1102 (enc == XML_CHAR_ENCODING_UTF16BE))) {
1104 * Errata on XML-1.0 June 20 2001
1105 * Specific handling of the Byte Order Mark for
1106 * UTF-8
1108 if ((ctxt->input->cur[0] == 0xEF) &&
1109 (ctxt->input->cur[1] == 0xBB) &&
1110 (ctxt->input->cur[2] == 0xBF)) {
1111 ctxt->input->cur += 3;
1115 switch (enc) {
1116 case XML_CHAR_ENCODING_ERROR:
1117 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
1118 "encoding unknown\n", NULL, NULL);
1119 return(-1);
1120 case XML_CHAR_ENCODING_NONE:
1121 /* let's assume it's UTF-8 without the XML decl */
1122 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1123 return(0);
1124 case XML_CHAR_ENCODING_UTF8:
1125 /* default encoding, no conversion should be needed */
1126 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1127 return(0);
1128 case XML_CHAR_ENCODING_EBCDIC:
1129 handler = xmlDetectEBCDIC(ctxt->input);
1130 break;
1131 default:
1132 handler = xmlGetCharEncodingHandler(enc);
1133 break;
1135 if (handler == NULL) {
1137 * Default handlers.
1139 switch (enc) {
1140 case XML_CHAR_ENCODING_ASCII:
1141 /* default encoding, no conversion should be needed */
1142 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1143 return(0);
1144 case XML_CHAR_ENCODING_8859_1:
1145 if ((ctxt->inputNr == 1) &&
1146 (ctxt->encoding == NULL) &&
1147 (ctxt->input != NULL) &&
1148 (ctxt->input->encoding != NULL)) {
1149 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1151 ctxt->charset = enc;
1152 return(0);
1153 default:
1154 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1155 "encoding not supported: %s\n",
1156 BAD_CAST xmlGetCharEncodingName(enc), NULL);
1158 * TODO: We could recover from errors in external entities
1159 * if we didn't stop the parser. But most callers of this
1160 * function don't check the return value.
1162 xmlStopParser(ctxt);
1163 return(-1);
1166 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1167 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1169 * on encoding conversion errors, stop the parser
1171 xmlStopParser(ctxt);
1172 ctxt->errNo = XML_I18N_CONV_FAILED;
1174 return(ret);
1178 * xmlSwitchInputEncoding:
1179 * @ctxt: the parser context
1180 * @input: the input stream
1181 * @handler: the encoding handler
1183 * change the input functions when discovering the character encoding
1184 * of a given entity.
1186 * Returns 0 in case of success, -1 otherwise
1189 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1190 xmlCharEncodingHandlerPtr handler)
1192 int nbchars;
1193 xmlParserInputBufferPtr in;
1195 if (handler == NULL)
1196 return (-1);
1197 if (input == NULL)
1198 return (-1);
1199 in = input->buf;
1200 if (in == NULL) {
1201 xmlErrInternal(ctxt,
1202 "static memory buffer doesn't support encoding\n", NULL);
1204 * Callers assume that the input buffer takes ownership of the
1205 * encoding handler. xmlCharEncCloseFunc frees unregistered
1206 * handlers and avoids a memory leak.
1208 xmlCharEncCloseFunc(handler);
1209 return (-1);
1212 if (in->encoder != NULL) {
1213 if (in->encoder == handler)
1214 return (0);
1217 * Switching encodings during parsing is a really bad idea,
1218 * but Chromium can switch between ISO-8859-1 and UTF-16 before
1219 * separate calls to xmlParseChunk.
1221 * TODO: We should check whether the "raw" input buffer is empty and
1222 * convert the old content using the old encoder.
1225 xmlCharEncCloseFunc(in->encoder);
1226 in->encoder = handler;
1227 return (0);
1230 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1231 in->encoder = handler;
1234 * Is there already some content down the pipe to convert ?
1236 if (xmlBufIsEmpty(in->buffer) == 0) {
1237 size_t processed, use, consumed;
1240 * FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1244 * Specific handling of the Byte Order Mark for
1245 * UTF-16
1247 if ((handler->name != NULL) &&
1248 (!strcmp(handler->name, "UTF-16LE") ||
1249 !strcmp(handler->name, "UTF-16")) &&
1250 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1251 input->cur += 2;
1253 if ((handler->name != NULL) &&
1254 (!strcmp(handler->name, "UTF-16BE")) &&
1255 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1256 input->cur += 2;
1259 * Errata on XML-1.0 June 20 2001
1260 * Specific handling of the Byte Order Mark for
1261 * UTF-8
1263 if ((handler->name != NULL) &&
1264 (!strcmp(handler->name, "UTF-8")) &&
1265 (input->cur[0] == 0xEF) &&
1266 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1267 input->cur += 3;
1271 * Shrink the current input buffer.
1272 * Move it as the raw buffer and create a new input buffer
1274 processed = input->cur - input->base;
1275 xmlBufShrink(in->buffer, processed);
1276 input->consumed += processed;
1277 in->raw = in->buffer;
1278 in->buffer = xmlBufCreate();
1279 in->rawconsumed = processed;
1280 use = xmlBufUse(in->raw);
1283 * TODO: We must flush and decode the whole buffer to make functions
1284 * like xmlReadMemory work with a user-provided encoding. If the
1285 * encoding is specified directly, we should probably set
1286 * XML_PARSE_IGNORE_ENC in xmlDoRead to avoid switching encodings
1287 * twice. Then we could set "flush" to false which should save
1288 * a considerable amount of memory when parsing from memory.
1289 * It's probably even possible to remove this whole if-block
1290 * completely.
1292 nbchars = xmlCharEncInput(in, 1);
1293 xmlBufResetInput(in->buffer, input);
1294 if (nbchars < 0) {
1295 /* TODO: This could be an out of memory or an encoding error. */
1296 xmlErrInternal(ctxt,
1297 "switching encoding: encoder error\n",
1298 NULL);
1299 xmlHaltParser(ctxt);
1300 return (-1);
1302 consumed = use - xmlBufUse(in->raw);
1303 if ((consumed > ULONG_MAX) ||
1304 (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1305 in->rawconsumed = ULONG_MAX;
1306 else
1307 in->rawconsumed += consumed;
1309 return (0);
1313 * xmlSwitchToEncoding:
1314 * @ctxt: the parser context
1315 * @handler: the encoding handler
1317 * change the input functions when discovering the character encoding
1318 * of a given entity.
1320 * Returns 0 in case of success, -1 otherwise
1323 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1325 if (ctxt == NULL)
1326 return(-1);
1327 return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1330 /************************************************************************
1332 * Commodity functions to handle entities processing *
1334 ************************************************************************/
1337 * xmlFreeInputStream:
1338 * @input: an xmlParserInputPtr
1340 * Free up an input stream.
1342 void
1343 xmlFreeInputStream(xmlParserInputPtr input) {
1344 if (input == NULL) return;
1346 if (input->filename != NULL) xmlFree((char *) input->filename);
1347 if (input->directory != NULL) xmlFree((char *) input->directory);
1348 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1349 if (input->version != NULL) xmlFree((char *) input->version);
1350 if ((input->free != NULL) && (input->base != NULL))
1351 input->free((xmlChar *) input->base);
1352 if (input->buf != NULL)
1353 xmlFreeParserInputBuffer(input->buf);
1354 xmlFree(input);
1358 * xmlNewInputStream:
1359 * @ctxt: an XML parser context
1361 * Create a new input stream structure.
1363 * Returns the new input stream or NULL
1365 xmlParserInputPtr
1366 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1367 xmlParserInputPtr input;
1369 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1370 if (input == NULL) {
1371 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1372 return(NULL);
1374 memset(input, 0, sizeof(xmlParserInput));
1375 input->line = 1;
1376 input->col = 1;
1377 input->standalone = -1;
1380 * If the context is NULL the id cannot be initialized, but that
1381 * should not happen while parsing which is the situation where
1382 * the id is actually needed.
1384 if (ctxt != NULL) {
1385 if (input->id >= INT_MAX) {
1386 xmlErrMemory(ctxt, "Input ID overflow\n");
1387 return(NULL);
1389 input->id = ctxt->input_id++;
1392 return(input);
1396 * xmlNewIOInputStream:
1397 * @ctxt: an XML parser context
1398 * @input: an I/O Input
1399 * @enc: the charset encoding if known
1401 * Create a new input stream structure encapsulating the @input into
1402 * a stream suitable for the parser.
1404 * Returns the new input stream or NULL
1406 xmlParserInputPtr
1407 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1408 xmlCharEncoding enc) {
1409 xmlParserInputPtr inputStream;
1411 if (input == NULL) return(NULL);
1412 if (xmlParserDebugEntities)
1413 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1414 inputStream = xmlNewInputStream(ctxt);
1415 if (inputStream == NULL) {
1416 return(NULL);
1418 inputStream->filename = NULL;
1419 inputStream->buf = input;
1420 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1422 if (enc != XML_CHAR_ENCODING_NONE) {
1423 xmlSwitchEncoding(ctxt, enc);
1426 return(inputStream);
1430 * xmlNewEntityInputStream:
1431 * @ctxt: an XML parser context
1432 * @entity: an Entity pointer
1434 * DEPRECATED: Internal function, do not use.
1436 * Create a new input stream based on an xmlEntityPtr
1438 * Returns the new input stream or NULL
1440 xmlParserInputPtr
1441 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1442 xmlParserInputPtr input;
1444 if (entity == NULL) {
1445 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1446 NULL);
1447 return(NULL);
1449 if (xmlParserDebugEntities)
1450 xmlGenericError(xmlGenericErrorContext,
1451 "new input from entity: %s\n", entity->name);
1452 if (entity->content == NULL) {
1453 switch (entity->etype) {
1454 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1455 xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1456 entity->name);
1457 break;
1458 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1459 case XML_EXTERNAL_PARAMETER_ENTITY:
1460 input = xmlLoadExternalEntity((char *) entity->URI,
1461 (char *) entity->ExternalID, ctxt);
1462 if (input != NULL)
1463 input->entity = entity;
1464 return(input);
1465 case XML_INTERNAL_GENERAL_ENTITY:
1466 xmlErrInternal(ctxt,
1467 "Internal entity %s without content !\n",
1468 entity->name);
1469 break;
1470 case XML_INTERNAL_PARAMETER_ENTITY:
1471 xmlErrInternal(ctxt,
1472 "Internal parameter entity %s without content !\n",
1473 entity->name);
1474 break;
1475 case XML_INTERNAL_PREDEFINED_ENTITY:
1476 xmlErrInternal(ctxt,
1477 "Predefined entity %s without content !\n",
1478 entity->name);
1479 break;
1481 return(NULL);
1483 input = xmlNewInputStream(ctxt);
1484 if (input == NULL) {
1485 return(NULL);
1487 if (entity->URI != NULL)
1488 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1489 input->base = entity->content;
1490 if (entity->length == 0)
1491 entity->length = xmlStrlen(entity->content);
1492 input->cur = entity->content;
1493 input->length = entity->length;
1494 input->end = &entity->content[input->length];
1495 input->entity = entity;
1496 return(input);
1500 * xmlNewStringInputStream:
1501 * @ctxt: an XML parser context
1502 * @buffer: an memory buffer
1504 * Create a new input stream based on a memory buffer.
1505 * Returns the new input stream
1507 xmlParserInputPtr
1508 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1509 xmlParserInputPtr input;
1510 xmlParserInputBufferPtr buf;
1512 if (buffer == NULL) {
1513 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1514 NULL);
1515 return(NULL);
1517 if (xmlParserDebugEntities)
1518 xmlGenericError(xmlGenericErrorContext,
1519 "new fixed input: %.30s\n", buffer);
1520 buf = xmlParserInputBufferCreateMem((const char *) buffer,
1521 xmlStrlen(buffer),
1522 XML_CHAR_ENCODING_NONE);
1523 if (buf == NULL) {
1524 xmlErrMemory(ctxt, NULL);
1525 return(NULL);
1527 input = xmlNewInputStream(ctxt);
1528 if (input == NULL) {
1529 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1530 xmlFreeParserInputBuffer(buf);
1531 return(NULL);
1533 input->buf = buf;
1534 xmlBufResetInput(input->buf->buffer, input);
1535 return(input);
1539 * xmlNewInputFromFile:
1540 * @ctxt: an XML parser context
1541 * @filename: the filename to use as entity
1543 * Create a new input stream based on a file or an URL.
1545 * Returns the new input stream or NULL in case of error
1547 xmlParserInputPtr
1548 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1549 xmlParserInputBufferPtr buf;
1550 xmlParserInputPtr inputStream;
1551 char *directory = NULL;
1552 xmlChar *URI = NULL;
1554 if (xmlParserDebugEntities)
1555 xmlGenericError(xmlGenericErrorContext,
1556 "new input from file: %s\n", filename);
1557 if (ctxt == NULL) return(NULL);
1558 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1559 if (buf == NULL) {
1560 if (filename == NULL)
1561 __xmlLoaderErr(ctxt,
1562 "failed to load external entity: NULL filename \n",
1563 NULL);
1564 else
1565 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1566 (const char *) filename);
1567 return(NULL);
1570 inputStream = xmlNewInputStream(ctxt);
1571 if (inputStream == NULL) {
1572 xmlFreeParserInputBuffer(buf);
1573 return(NULL);
1576 inputStream->buf = buf;
1577 inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1578 if (inputStream == NULL)
1579 return(NULL);
1581 if (inputStream->filename == NULL)
1582 URI = xmlStrdup((xmlChar *) filename);
1583 else
1584 URI = xmlStrdup((xmlChar *) inputStream->filename);
1585 directory = xmlParserGetDirectory((const char *) URI);
1586 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1587 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1588 if (URI != NULL) xmlFree((char *) URI);
1589 inputStream->directory = directory;
1591 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1592 if ((ctxt->directory == NULL) && (directory != NULL))
1593 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1594 return(inputStream);
1597 /************************************************************************
1599 * Commodity functions to handle parser contexts *
1601 ************************************************************************/
1604 * xmlInitSAXParserCtxt:
1605 * @ctxt: XML parser context
1606 * @sax: SAX handlert
1607 * @userData: user data
1609 * Initialize a SAX parser context
1611 * Returns 0 in case of success and -1 in case of error
1614 static int
1615 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1616 void *userData)
1618 xmlParserInputPtr input;
1620 if(ctxt==NULL) {
1621 xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1622 return(-1);
1625 xmlInitParser();
1627 if (ctxt->dict == NULL)
1628 ctxt->dict = xmlDictCreate();
1629 if (ctxt->dict == NULL) {
1630 xmlErrMemory(NULL, "cannot initialize parser context\n");
1631 return(-1);
1633 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1635 if (ctxt->sax == NULL)
1636 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1637 if (ctxt->sax == NULL) {
1638 xmlErrMemory(NULL, "cannot initialize parser context\n");
1639 return(-1);
1641 if (sax == NULL) {
1642 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1643 xmlSAXVersion(ctxt->sax, 2);
1644 ctxt->userData = ctxt;
1645 } else {
1646 if (sax->initialized == XML_SAX2_MAGIC) {
1647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1648 } else {
1649 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1650 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1652 ctxt->userData = userData ? userData : ctxt;
1655 ctxt->maxatts = 0;
1656 ctxt->atts = NULL;
1657 /* Allocate the Input stack */
1658 if (ctxt->inputTab == NULL) {
1659 ctxt->inputTab = (xmlParserInputPtr *)
1660 xmlMalloc(5 * sizeof(xmlParserInputPtr));
1661 ctxt->inputMax = 5;
1663 if (ctxt->inputTab == NULL) {
1664 xmlErrMemory(NULL, "cannot initialize parser context\n");
1665 ctxt->inputNr = 0;
1666 ctxt->inputMax = 0;
1667 ctxt->input = NULL;
1668 return(-1);
1670 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1671 xmlFreeInputStream(input);
1673 ctxt->inputNr = 0;
1674 ctxt->input = NULL;
1676 ctxt->version = NULL;
1677 ctxt->encoding = NULL;
1678 ctxt->standalone = -1;
1679 ctxt->hasExternalSubset = 0;
1680 ctxt->hasPErefs = 0;
1681 ctxt->html = 0;
1682 ctxt->external = 0;
1683 ctxt->instate = XML_PARSER_START;
1684 ctxt->token = 0;
1685 ctxt->directory = NULL;
1687 /* Allocate the Node stack */
1688 if (ctxt->nodeTab == NULL) {
1689 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1690 ctxt->nodeMax = 10;
1692 if (ctxt->nodeTab == NULL) {
1693 xmlErrMemory(NULL, "cannot initialize parser context\n");
1694 ctxt->nodeNr = 0;
1695 ctxt->nodeMax = 0;
1696 ctxt->node = NULL;
1697 ctxt->inputNr = 0;
1698 ctxt->inputMax = 0;
1699 ctxt->input = NULL;
1700 return(-1);
1702 ctxt->nodeNr = 0;
1703 ctxt->node = NULL;
1705 /* Allocate the Name stack */
1706 if (ctxt->nameTab == NULL) {
1707 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1708 ctxt->nameMax = 10;
1710 if (ctxt->nameTab == NULL) {
1711 xmlErrMemory(NULL, "cannot initialize parser context\n");
1712 ctxt->nodeNr = 0;
1713 ctxt->nodeMax = 0;
1714 ctxt->node = NULL;
1715 ctxt->inputNr = 0;
1716 ctxt->inputMax = 0;
1717 ctxt->input = NULL;
1718 ctxt->nameNr = 0;
1719 ctxt->nameMax = 0;
1720 ctxt->name = NULL;
1721 return(-1);
1723 ctxt->nameNr = 0;
1724 ctxt->name = NULL;
1726 /* Allocate the space stack */
1727 if (ctxt->spaceTab == NULL) {
1728 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1729 ctxt->spaceMax = 10;
1731 if (ctxt->spaceTab == NULL) {
1732 xmlErrMemory(NULL, "cannot initialize parser context\n");
1733 ctxt->nodeNr = 0;
1734 ctxt->nodeMax = 0;
1735 ctxt->node = NULL;
1736 ctxt->inputNr = 0;
1737 ctxt->inputMax = 0;
1738 ctxt->input = NULL;
1739 ctxt->nameNr = 0;
1740 ctxt->nameMax = 0;
1741 ctxt->name = NULL;
1742 ctxt->spaceNr = 0;
1743 ctxt->spaceMax = 0;
1744 ctxt->space = NULL;
1745 return(-1);
1747 ctxt->spaceNr = 1;
1748 ctxt->spaceMax = 10;
1749 ctxt->spaceTab[0] = -1;
1750 ctxt->space = &ctxt->spaceTab[0];
1751 ctxt->myDoc = NULL;
1752 ctxt->wellFormed = 1;
1753 ctxt->nsWellFormed = 1;
1754 ctxt->valid = 1;
1755 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1756 if (ctxt->loadsubset) {
1757 ctxt->options |= XML_PARSE_DTDLOAD;
1759 ctxt->validate = xmlDoValidityCheckingDefaultValue;
1760 ctxt->pedantic = xmlPedanticParserDefaultValue;
1761 if (ctxt->pedantic) {
1762 ctxt->options |= XML_PARSE_PEDANTIC;
1764 ctxt->linenumbers = xmlLineNumbersDefaultValue;
1765 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1766 if (ctxt->keepBlanks == 0) {
1767 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1768 ctxt->options |= XML_PARSE_NOBLANKS;
1771 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1772 ctxt->vctxt.userData = ctxt;
1773 ctxt->vctxt.error = xmlParserValidityError;
1774 ctxt->vctxt.warning = xmlParserValidityWarning;
1775 if (ctxt->validate) {
1776 if (xmlGetWarningsDefaultValue == 0)
1777 ctxt->vctxt.warning = NULL;
1778 else
1779 ctxt->vctxt.warning = xmlParserValidityWarning;
1780 ctxt->vctxt.nodeMax = 0;
1781 ctxt->options |= XML_PARSE_DTDVALID;
1783 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1784 if (ctxt->replaceEntities) {
1785 ctxt->options |= XML_PARSE_NOENT;
1787 ctxt->record_info = 0;
1788 ctxt->checkIndex = 0;
1789 ctxt->inSubset = 0;
1790 ctxt->errNo = XML_ERR_OK;
1791 ctxt->depth = 0;
1792 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1793 ctxt->catalogs = NULL;
1794 ctxt->sizeentities = 0;
1795 ctxt->sizeentcopy = 0;
1796 ctxt->input_id = 1;
1797 xmlInitNodeInfoSeq(&ctxt->node_seq);
1798 return(0);
1802 * xmlInitParserCtxt:
1803 * @ctxt: an XML parser context
1805 * DEPRECATED: Internal function which will be made private in a future
1806 * version.
1808 * Initialize a parser context
1810 * Returns 0 in case of success and -1 in case of error
1814 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1816 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1820 * xmlFreeParserCtxt:
1821 * @ctxt: an XML parser context
1823 * Free all the memory used by a parser context. However the parsed
1824 * document in ctxt->myDoc is not freed.
1827 void
1828 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1830 xmlParserInputPtr input;
1832 if (ctxt == NULL) return;
1834 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1835 xmlFreeInputStream(input);
1837 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1838 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1839 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1840 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1841 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1842 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1843 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1844 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1845 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1846 #ifdef LIBXML_SAX1_ENABLED
1847 if ((ctxt->sax != NULL) &&
1848 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1849 #else
1850 if (ctxt->sax != NULL)
1851 #endif /* LIBXML_SAX1_ENABLED */
1852 xmlFree(ctxt->sax);
1853 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1854 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1855 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1856 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1857 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1858 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1859 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1860 if (ctxt->attsDefault != NULL)
1861 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1862 if (ctxt->attsSpecial != NULL)
1863 xmlHashFree(ctxt->attsSpecial, NULL);
1864 if (ctxt->freeElems != NULL) {
1865 xmlNodePtr cur, next;
1867 cur = ctxt->freeElems;
1868 while (cur != NULL) {
1869 next = cur->next;
1870 xmlFree(cur);
1871 cur = next;
1874 if (ctxt->freeAttrs != NULL) {
1875 xmlAttrPtr cur, next;
1877 cur = ctxt->freeAttrs;
1878 while (cur != NULL) {
1879 next = cur->next;
1880 xmlFree(cur);
1881 cur = next;
1885 * cleanup the error strings
1887 if (ctxt->lastError.message != NULL)
1888 xmlFree(ctxt->lastError.message);
1889 if (ctxt->lastError.file != NULL)
1890 xmlFree(ctxt->lastError.file);
1891 if (ctxt->lastError.str1 != NULL)
1892 xmlFree(ctxt->lastError.str1);
1893 if (ctxt->lastError.str2 != NULL)
1894 xmlFree(ctxt->lastError.str2);
1895 if (ctxt->lastError.str3 != NULL)
1896 xmlFree(ctxt->lastError.str3);
1898 #ifdef LIBXML_CATALOG_ENABLED
1899 if (ctxt->catalogs != NULL)
1900 xmlCatalogFreeLocal(ctxt->catalogs);
1901 #endif
1902 xmlFree(ctxt);
1906 * xmlNewParserCtxt:
1908 * Allocate and initialize a new parser context.
1910 * Returns the xmlParserCtxtPtr or NULL
1913 xmlParserCtxtPtr
1914 xmlNewParserCtxt(void)
1916 return(xmlNewSAXParserCtxt(NULL, NULL));
1920 * xmlNewSAXParserCtxt:
1921 * @sax: SAX handler
1922 * @userData: user data
1924 * Allocate and initialize a new SAX parser context. If userData is NULL,
1925 * the parser context will be passed as user data.
1927 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1930 xmlParserCtxtPtr
1931 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1933 xmlParserCtxtPtr ctxt;
1935 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1936 if (ctxt == NULL) {
1937 xmlErrMemory(NULL, "cannot allocate parser context\n");
1938 return(NULL);
1940 memset(ctxt, 0, sizeof(xmlParserCtxt));
1941 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1942 xmlFreeParserCtxt(ctxt);
1943 return(NULL);
1945 return(ctxt);
1948 /************************************************************************
1950 * Handling of node information *
1952 ************************************************************************/
1955 * xmlClearParserCtxt:
1956 * @ctxt: an XML parser context
1958 * Clear (release owned resources) and reinitialize a parser context
1961 void
1962 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1964 if (ctxt==NULL)
1965 return;
1966 xmlClearNodeInfoSeq(&ctxt->node_seq);
1967 xmlCtxtReset(ctxt);
1972 * xmlParserFindNodeInfo:
1973 * @ctx: an XML parser context
1974 * @node: an XML node within the tree
1976 * DEPRECATED: Don't use.
1978 * Find the parser node info struct for a given node
1980 * Returns an xmlParserNodeInfo block pointer or NULL
1982 const xmlParserNodeInfo *
1983 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1985 unsigned long pos;
1987 if ((ctx == NULL) || (node == NULL))
1988 return (NULL);
1989 /* Find position where node should be at */
1990 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1991 if (pos < ctx->node_seq.length
1992 && ctx->node_seq.buffer[pos].node == node)
1993 return &ctx->node_seq.buffer[pos];
1994 else
1995 return NULL;
2000 * xmlInitNodeInfoSeq:
2001 * @seq: a node info sequence pointer
2003 * DEPRECATED: Don't use.
2005 * -- Initialize (set to initial state) node info sequence
2007 void
2008 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2010 if (seq == NULL)
2011 return;
2012 seq->length = 0;
2013 seq->maximum = 0;
2014 seq->buffer = NULL;
2018 * xmlClearNodeInfoSeq:
2019 * @seq: a node info sequence pointer
2021 * DEPRECATED: Don't use.
2023 * -- Clear (release memory and reinitialize) node
2024 * info sequence
2026 void
2027 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2029 if (seq == NULL)
2030 return;
2031 if (seq->buffer != NULL)
2032 xmlFree(seq->buffer);
2033 xmlInitNodeInfoSeq(seq);
2037 * xmlParserFindNodeInfoIndex:
2038 * @seq: a node info sequence pointer
2039 * @node: an XML node pointer
2041 * DEPRECATED: Don't use.
2043 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2044 * the given node is or should be at in a sorted sequence
2046 * Returns a long indicating the position of the record
2048 unsigned long
2049 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2050 const xmlNodePtr node)
2052 unsigned long upper, lower, middle;
2053 int found = 0;
2055 if ((seq == NULL) || (node == NULL))
2056 return ((unsigned long) -1);
2058 /* Do a binary search for the key */
2059 lower = 1;
2060 upper = seq->length;
2061 middle = 0;
2062 while (lower <= upper && !found) {
2063 middle = lower + (upper - lower) / 2;
2064 if (node == seq->buffer[middle - 1].node)
2065 found = 1;
2066 else if (node < seq->buffer[middle - 1].node)
2067 upper = middle - 1;
2068 else
2069 lower = middle + 1;
2072 /* Return position */
2073 if (middle == 0 || seq->buffer[middle - 1].node < node)
2074 return middle;
2075 else
2076 return middle - 1;
2081 * xmlParserAddNodeInfo:
2082 * @ctxt: an XML parser context
2083 * @info: a node info sequence pointer
2085 * DEPRECATED: Don't use.
2087 * Insert node info record into the sorted sequence
2089 void
2090 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2091 const xmlParserNodeInfoPtr info)
2093 unsigned long pos;
2095 if ((ctxt == NULL) || (info == NULL)) return;
2097 /* Find pos and check to see if node is already in the sequence */
2098 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2099 info->node);
2101 if ((pos < ctxt->node_seq.length) &&
2102 (ctxt->node_seq.buffer != NULL) &&
2103 (ctxt->node_seq.buffer[pos].node == info->node)) {
2104 ctxt->node_seq.buffer[pos] = *info;
2107 /* Otherwise, we need to add new node to buffer */
2108 else {
2109 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2110 (ctxt->node_seq.buffer == NULL)) {
2111 xmlParserNodeInfo *tmp_buffer;
2112 unsigned int byte_size;
2114 if (ctxt->node_seq.maximum == 0)
2115 ctxt->node_seq.maximum = 2;
2116 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2117 (2 * ctxt->node_seq.maximum));
2119 if (ctxt->node_seq.buffer == NULL)
2120 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2121 else
2122 tmp_buffer =
2123 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2124 byte_size);
2126 if (tmp_buffer == NULL) {
2127 xmlErrMemory(ctxt, "failed to allocate buffer\n");
2128 return;
2130 ctxt->node_seq.buffer = tmp_buffer;
2131 ctxt->node_seq.maximum *= 2;
2134 /* If position is not at end, move elements out of the way */
2135 if (pos != ctxt->node_seq.length) {
2136 unsigned long i;
2138 for (i = ctxt->node_seq.length; i > pos; i--)
2139 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2142 /* Copy element and increase length */
2143 ctxt->node_seq.buffer[pos] = *info;
2144 ctxt->node_seq.length++;
2148 /************************************************************************
2150 * Defaults settings *
2152 ************************************************************************/
2154 * xmlPedanticParserDefault:
2155 * @val: int 0 or 1
2157 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2159 * Set and return the previous value for enabling pedantic warnings.
2161 * Returns the last value for 0 for no substitution, 1 for substitution.
2165 xmlPedanticParserDefault(int val) {
2166 int old = xmlPedanticParserDefaultValue;
2168 xmlPedanticParserDefaultValue = val;
2169 return(old);
2173 * xmlLineNumbersDefault:
2174 * @val: int 0 or 1
2176 * DEPRECATED: The modern options API always enables line numbers.
2178 * Set and return the previous value for enabling line numbers in elements
2179 * contents. This may break on old application and is turned off by default.
2181 * Returns the last value for 0 for no substitution, 1 for substitution.
2185 xmlLineNumbersDefault(int val) {
2186 int old = xmlLineNumbersDefaultValue;
2188 xmlLineNumbersDefaultValue = val;
2189 return(old);
2193 * xmlSubstituteEntitiesDefault:
2194 * @val: int 0 or 1
2196 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2198 * Set and return the previous value for default entity support.
2199 * Initially the parser always keep entity references instead of substituting
2200 * entity values in the output. This function has to be used to change the
2201 * default parser behavior
2202 * SAX::substituteEntities() has to be used for changing that on a file by
2203 * file basis.
2205 * Returns the last value for 0 for no substitution, 1 for substitution.
2209 xmlSubstituteEntitiesDefault(int val) {
2210 int old = xmlSubstituteEntitiesDefaultValue;
2212 xmlSubstituteEntitiesDefaultValue = val;
2213 return(old);
2217 * xmlKeepBlanksDefault:
2218 * @val: int 0 or 1
2220 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2222 * Set and return the previous value for default blanks text nodes support.
2223 * The 1.x version of the parser used an heuristic to try to detect
2224 * ignorable white spaces. As a result the SAX callback was generating
2225 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2226 * using the DOM output text nodes containing those blanks were not generated.
2227 * The 2.x and later version will switch to the XML standard way and
2228 * ignorableWhitespace() are only generated when running the parser in
2229 * validating mode and when the current element doesn't allow CDATA or
2230 * mixed content.
2231 * This function is provided as a way to force the standard behavior
2232 * on 1.X libs and to switch back to the old mode for compatibility when
2233 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2234 * by using xmlIsBlankNode() commodity function to detect the "empty"
2235 * nodes generated.
2236 * This value also affect autogeneration of indentation when saving code
2237 * if blanks sections are kept, indentation is not generated.
2239 * Returns the last value for 0 for no substitution, 1 for substitution.
2243 xmlKeepBlanksDefault(int val) {
2244 int old = xmlKeepBlanksDefaultValue;
2246 xmlKeepBlanksDefaultValue = val;
2247 if (!val) xmlIndentTreeOutput = 1;
2248 return(old);