ws2_32/tests: Execute test_iocp() near the end.
[wine.git] / libs / xml2 / parserInternals.c
blobb92f6426553aff1555e481a5229666a50fc056d2
1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
5 * See Copyright for the status of this software.
7 * daniel@veillard.com
8 */
10 #define IN_LIBXML
11 #include "libxml.h"
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/valid.h>
28 #include <libxml/entities.h>
29 #include <libxml/xmlerror.h>
30 #include <libxml/encoding.h>
31 #include <libxml/valid.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/uri.h>
34 #include <libxml/dict.h>
35 #include <libxml/SAX.h>
36 #ifdef LIBXML_CATALOG_ENABLED
37 #include <libxml/catalog.h>
38 #endif
39 #include <libxml/globals.h>
40 #include <libxml/chvalid.h>
42 #define CUR(ctxt) ctxt->input->cur
43 #define END(ctxt) ctxt->input->end
44 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
46 #include "private/buf.h"
47 #include "private/enc.h"
48 #include "private/error.h"
49 #include "private/io.h"
50 #include "private/parser.h"
53 * Various global defaults for parsing
56 /**
57 * xmlCheckVersion:
58 * @version: the include version number
60 * check the compiled lib version against the include one.
61 * This can warn or immediately kill the application
63 void
64 xmlCheckVersion(int version) {
65 int myversion = LIBXML_VERSION;
67 xmlInitParser();
69 if ((myversion / 10000) != (version / 10000)) {
70 xmlGenericError(xmlGenericErrorContext,
71 "Fatal: program compiled against libxml %d using libxml %d\n",
72 (version / 10000), (myversion / 10000));
73 fprintf(stderr,
74 "Fatal: program compiled against libxml %d using libxml %d\n",
75 (version / 10000), (myversion / 10000));
77 if ((myversion / 100) < (version / 100)) {
78 xmlGenericError(xmlGenericErrorContext,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
85 /************************************************************************
86 * *
87 * Some factorized error routines *
88 * *
89 ************************************************************************/
92 /**
93 * xmlErrMemory:
94 * @ctxt: an XML parser context
95 * @extra: extra information
97 * Handle a redefinition of attribute error
99 void
100 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
102 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
103 (ctxt->instate == XML_PARSER_EOF))
104 return;
105 if (ctxt != NULL) {
106 ctxt->errNo = XML_ERR_NO_MEMORY;
107 ctxt->instate = XML_PARSER_EOF;
108 ctxt->disableSAX = 1;
110 if (extra)
111 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
112 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
113 NULL, NULL, 0, 0,
114 "Memory allocation failed : %s\n", extra);
115 else
116 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
117 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
118 NULL, NULL, 0, 0, "Memory allocation failed\n");
122 * __xmlErrEncoding:
123 * @ctxt: an XML parser context
124 * @xmlerr: the error number
125 * @msg: the error message
126 * @str1: an string info
127 * @str2: an string info
129 * Handle an encoding error
131 void
132 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
133 const char *msg, const xmlChar * str1, const xmlChar * str2)
135 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
136 (ctxt->instate == XML_PARSER_EOF))
137 return;
138 if (ctxt != NULL)
139 ctxt->errNo = xmlerr;
140 __xmlRaiseError(NULL, NULL, NULL,
141 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
142 NULL, 0, (const char *) str1, (const char *) str2,
143 NULL, 0, 0, msg, str1, str2);
144 if (ctxt != NULL) {
145 ctxt->wellFormed = 0;
146 if (ctxt->recovery == 0)
147 ctxt->disableSAX = 1;
152 * xmlErrInternal:
153 * @ctxt: an XML parser context
154 * @msg: the error message
155 * @str: error information
157 * Handle an internal error
159 static void LIBXML_ATTR_FORMAT(2,0)
160 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
162 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
163 (ctxt->instate == XML_PARSER_EOF))
164 return;
165 if (ctxt != NULL)
166 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
167 __xmlRaiseError(NULL, NULL, NULL,
168 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
169 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
170 0, 0, msg, str);
171 if (ctxt != NULL) {
172 ctxt->wellFormed = 0;
173 if (ctxt->recovery == 0)
174 ctxt->disableSAX = 1;
179 * xmlErrEncodingInt:
180 * @ctxt: an XML parser context
181 * @error: the error number
182 * @msg: the error message
183 * @val: an integer value
185 * n encoding error
187 static void LIBXML_ATTR_FORMAT(3,0)
188 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
189 const char *msg, int val)
191 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
192 (ctxt->instate == XML_PARSER_EOF))
193 return;
194 if (ctxt != NULL)
195 ctxt->errNo = error;
196 __xmlRaiseError(NULL, NULL, NULL,
197 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
198 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
199 if (ctxt != NULL) {
200 ctxt->wellFormed = 0;
201 if (ctxt->recovery == 0)
202 ctxt->disableSAX = 1;
207 * xmlIsLetter:
208 * @c: an unicode character (int)
210 * Check whether the character is allowed by the production
211 * [84] Letter ::= BaseChar | Ideographic
213 * Returns 0 if not, non-zero otherwise
216 xmlIsLetter(int c) {
217 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
220 /************************************************************************
222 * Input handling functions for progressive parsing *
224 ************************************************************************/
226 /* #define DEBUG_INPUT */
227 /* #define DEBUG_STACK */
228 /* #define DEBUG_PUSH */
231 /* we need to keep enough input to show errors in context */
232 #define LINE_LEN 80
234 #ifdef DEBUG_INPUT
235 #define CHECK_BUFFER(in) check_buffer(in)
237 static
238 void check_buffer(xmlParserInputPtr in) {
239 if (in->base != xmlBufContent(in->buf->buffer)) {
240 xmlGenericError(xmlGenericErrorContext,
241 "xmlParserInput: base mismatch problem\n");
243 if (in->cur < in->base) {
244 xmlGenericError(xmlGenericErrorContext,
245 "xmlParserInput: cur < base problem\n");
247 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
248 xmlGenericError(xmlGenericErrorContext,
249 "xmlParserInput: cur > base + use problem\n");
251 xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",
252 (void *) in, (int) xmlBufContent(in->buf->buffer),
253 in->cur - in->base, xmlBufUse(in->buf->buffer));
256 #else
257 #define CHECK_BUFFER(in)
258 #endif
262 * xmlHaltParser:
263 * @ctxt: an XML parser context
265 * Blocks further parser processing don't override error
266 * for internal use
268 void
269 xmlHaltParser(xmlParserCtxtPtr ctxt) {
270 if (ctxt == NULL)
271 return;
272 ctxt->instate = XML_PARSER_EOF;
273 ctxt->disableSAX = 1;
274 while (ctxt->inputNr > 1)
275 xmlFreeInputStream(inputPop(ctxt));
276 if (ctxt->input != NULL) {
278 * in case there was a specific allocation deallocate before
279 * overriding base
281 if (ctxt->input->free != NULL) {
282 ctxt->input->free((xmlChar *) ctxt->input->base);
283 ctxt->input->free = NULL;
285 if (ctxt->input->buf != NULL) {
286 xmlFreeParserInputBuffer(ctxt->input->buf);
287 ctxt->input->buf = NULL;
289 ctxt->input->cur = BAD_CAST"";
290 ctxt->input->length = 0;
291 ctxt->input->base = ctxt->input->cur;
292 ctxt->input->end = ctxt->input->cur;
297 * xmlParserInputRead:
298 * @in: an XML parser input
299 * @len: an indicative size for the lookahead
301 * DEPRECATED: This function was internal and is deprecated.
303 * Returns -1 as this is an error to use it.
306 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
307 return(-1);
311 * xmlParserGrow:
312 * @ctxt: an XML parser context
315 xmlParserGrow(xmlParserCtxtPtr ctxt) {
316 xmlParserInputPtr in = ctxt->input;
317 xmlParserInputBufferPtr buf = in->buf;
318 ptrdiff_t curEnd = in->end - in->cur;
319 ptrdiff_t curBase = in->cur - in->base;
320 int ret;
322 if (buf == NULL)
323 return(0);
324 /* Don't grow push parser buffer. */
325 if (ctxt->progressive)
326 return(0);
327 /* Don't grow memory buffers. */
328 if ((buf->encoder == NULL) && (buf->readcallback == NULL))
329 return(0);
331 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
332 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
333 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
334 xmlErrInternal(ctxt, "Huge input lookup", NULL);
335 xmlHaltParser(ctxt);
336 return(-1);
339 if (curEnd >= INPUT_CHUNK)
340 return(0);
342 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
343 xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
345 /* TODO: Get error code from xmlParserInputBufferGrow */
346 if (ret < 0) {
347 xmlErrInternal(ctxt, "Growing input buffer", NULL);
348 xmlHaltParser(ctxt);
351 return(ret);
355 * xmlParserInputGrow:
356 * @in: an XML parser input
357 * @len: an indicative size for the lookahead
359 * DEPRECATED: Don't use.
361 * This function increase the input for the parser. It tries to
362 * preserve pointers to the input buffer, and keep already read data
364 * Returns the amount of char read, or -1 in case of error, 0 indicate the
365 * end of this entity
368 xmlParserInputGrow(xmlParserInputPtr in, int len) {
369 int ret;
370 size_t indx;
372 if ((in == NULL) || (len < 0)) return(-1);
373 #ifdef DEBUG_INPUT
374 xmlGenericError(xmlGenericErrorContext, "Grow\n");
375 #endif
376 if (in->buf == NULL) return(-1);
377 if (in->base == NULL) return(-1);
378 if (in->cur == NULL) return(-1);
379 if (in->buf->buffer == NULL) return(-1);
381 /* Don't grow memory buffers. */
382 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
383 return(0);
385 CHECK_BUFFER(in);
387 indx = in->cur - in->base;
388 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
390 CHECK_BUFFER(in);
392 return(0);
394 ret = xmlParserInputBufferGrow(in->buf, len);
396 in->base = xmlBufContent(in->buf->buffer);
397 if (in->base == NULL) {
398 in->base = BAD_CAST "";
399 in->cur = in->base;
400 in->end = in->base;
401 return(-1);
403 in->cur = in->base + indx;
404 in->end = xmlBufEnd(in->buf->buffer);
406 CHECK_BUFFER(in);
408 return(ret);
412 * xmlParserShrink:
413 * @ctxt: an XML parser context
415 void
416 xmlParserShrink(xmlParserCtxtPtr ctxt) {
417 xmlParserInputPtr in = ctxt->input;
418 xmlParserInputBufferPtr buf = in->buf;
419 size_t used;
421 /* Don't shrink pull parser memory buffers. */
422 if ((buf == NULL) ||
423 ((ctxt->progressive == 0) &&
424 (buf->encoder == NULL) && (buf->readcallback == NULL)))
425 return;
427 used = in->cur - in->base;
429 * Do not shrink on large buffers whose only a tiny fraction
430 * was consumed
432 if (used > INPUT_CHUNK) {
433 size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
435 if (res > 0) {
436 used -= res;
437 if ((res > ULONG_MAX) ||
438 (in->consumed > ULONG_MAX - (unsigned long)res))
439 in->consumed = ULONG_MAX;
440 else
441 in->consumed += res;
445 xmlBufSetInputBaseCur(buf->buffer, in, 0, used);
449 * xmlParserInputShrink:
450 * @in: an XML parser input
452 * DEPRECATED: Don't use.
454 * This function removes used input for the parser.
456 void
457 xmlParserInputShrink(xmlParserInputPtr in) {
458 size_t used;
459 size_t ret;
461 #ifdef DEBUG_INPUT
462 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
463 #endif
464 if (in == NULL) return;
465 if (in->buf == NULL) return;
466 if (in->base == NULL) return;
467 if (in->cur == NULL) return;
468 if (in->buf->buffer == NULL) return;
470 CHECK_BUFFER(in);
472 used = in->cur - in->base;
474 * Do not shrink on large buffers whose only a tiny fraction
475 * was consumed
477 if (used > INPUT_CHUNK) {
478 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
479 if (ret > 0) {
480 used -= ret;
481 if ((ret > ULONG_MAX) ||
482 (in->consumed > ULONG_MAX - (unsigned long)ret))
483 in->consumed = ULONG_MAX;
484 else
485 in->consumed += ret;
489 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
490 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
493 in->base = xmlBufContent(in->buf->buffer);
494 if (in->base == NULL) {
495 /* TODO: raise error */
496 in->base = BAD_CAST "";
497 in->cur = in->base;
498 in->end = in->base;
499 return;
501 in->cur = in->base + used;
502 in->end = xmlBufEnd(in->buf->buffer);
504 CHECK_BUFFER(in);
507 /************************************************************************
509 * UTF8 character input and related functions *
511 ************************************************************************/
514 * xmlNextChar:
515 * @ctxt: the XML parser context
517 * DEPRECATED: Internal function, do not use.
519 * Skip to the next char input char.
522 void
523 xmlNextChar(xmlParserCtxtPtr ctxt)
525 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
526 (ctxt->input == NULL))
527 return;
529 if (!(VALID_CTXT(ctxt))) {
530 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
531 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
532 xmlStopParser(ctxt);
533 return;
536 if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
537 if (xmlParserGrow(ctxt) < 0)
538 return;
539 if (ctxt->input->cur >= ctxt->input->end)
540 return;
543 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
544 const unsigned char *cur;
545 unsigned char c;
548 * 2.11 End-of-Line Handling
549 * the literal two-character sequence "#xD#xA" or a standalone
550 * literal #xD, an XML processor must pass to the application
551 * the single character #xA.
553 if (*(ctxt->input->cur) == '\n') {
554 ctxt->input->line++; ctxt->input->col = 1;
555 } else
556 ctxt->input->col++;
559 * We are supposed to handle UTF8, check it's valid
560 * From rfc2044: encoding of the Unicode values on UTF-8:
562 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
563 * 0000 0000-0000 007F 0xxxxxxx
564 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
565 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
567 * Check for the 0x110000 limit too
569 cur = ctxt->input->cur;
571 c = *cur;
572 if (c & 0x80) {
573 size_t avail;
575 if (c == 0xC0)
576 goto encoding_error;
578 avail = ctxt->input->end - ctxt->input->cur;
580 if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
581 goto encoding_error;
582 if ((c & 0xe0) == 0xe0) {
583 unsigned int val;
585 if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
586 goto encoding_error;
587 if ((c & 0xf0) == 0xf0) {
588 if (((c & 0xf8) != 0xf0) ||
589 (avail < 4) || ((cur[3] & 0xc0) != 0x80))
590 goto encoding_error;
591 /* 4-byte code */
592 ctxt->input->cur += 4;
593 val = (cur[0] & 0x7) << 18;
594 val |= (cur[1] & 0x3f) << 12;
595 val |= (cur[2] & 0x3f) << 6;
596 val |= cur[3] & 0x3f;
597 } else {
598 /* 3-byte code */
599 ctxt->input->cur += 3;
600 val = (cur[0] & 0xf) << 12;
601 val |= (cur[1] & 0x3f) << 6;
602 val |= cur[2] & 0x3f;
604 if (((val > 0xd7ff) && (val < 0xe000)) ||
605 ((val > 0xfffd) && (val < 0x10000)) ||
606 (val >= 0x110000)) {
607 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
608 "Char 0x%X out of allowed range\n",
609 val);
611 } else
612 /* 2-byte code */
613 ctxt->input->cur += 2;
614 } else
615 /* 1-byte code */
616 ctxt->input->cur++;
617 } else {
619 * Assume it's a fixed length encoding (1) with
620 * a compatible encoding for the ASCII set, since
621 * XML constructs only use < 128 chars
624 if (*(ctxt->input->cur) == '\n') {
625 ctxt->input->line++; ctxt->input->col = 1;
626 } else
627 ctxt->input->col++;
628 ctxt->input->cur++;
630 return;
631 encoding_error:
633 * If we detect an UTF8 error that probably mean that the
634 * input encoding didn't get properly advertised in the
635 * declaration header. Report the error and switch the encoding
636 * to ISO-Latin-1 (if you don't like this policy, just declare the
637 * encoding !)
639 if ((ctxt == NULL) || (ctxt->input == NULL) ||
640 (ctxt->input->end - ctxt->input->cur < 4)) {
641 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
642 "Input is not proper UTF-8, indicate encoding !\n",
643 NULL, NULL);
644 } else {
645 char buffer[150];
647 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
648 ctxt->input->cur[0], ctxt->input->cur[1],
649 ctxt->input->cur[2], ctxt->input->cur[3]);
650 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
651 "Input is not proper UTF-8, indicate encoding !\n%s",
652 BAD_CAST buffer, NULL);
654 ctxt->charset = XML_CHAR_ENCODING_8859_1;
655 ctxt->input->cur++;
656 return;
660 * xmlCurrentChar:
661 * @ctxt: the XML parser context
662 * @len: pointer to the length of the char read
664 * DEPRECATED: Internal function, do not use.
666 * The current char value, if using UTF-8 this may actually span multiple
667 * bytes in the input buffer. Implement the end of line normalization:
668 * 2.11 End-of-Line Handling
669 * Wherever an external parsed entity or the literal entity value
670 * of an internal parsed entity contains either the literal two-character
671 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
672 * must pass to the application the single character #xA.
673 * This behavior can conveniently be produced by normalizing all
674 * line breaks to #xA on input, before parsing.)
676 * Returns the current char value and its length
680 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
681 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
682 if (ctxt->instate == XML_PARSER_EOF)
683 return(0);
685 if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
686 (xmlParserGrow(ctxt) < 0))
687 return(0);
689 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
690 *len = 1;
691 return(*ctxt->input->cur);
693 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
695 * We are supposed to handle UTF8, check it's valid
696 * From rfc2044: encoding of the Unicode values on UTF-8:
698 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
699 * 0000 0000-0000 007F 0xxxxxxx
700 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
701 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
703 * Check for the 0x110000 limit too
705 const unsigned char *cur = ctxt->input->cur;
706 unsigned char c;
707 unsigned int val;
709 c = *cur;
710 if (c & 0x80) {
711 size_t avail;
713 if (((c & 0x40) == 0) || (c == 0xC0))
714 goto encoding_error;
716 avail = ctxt->input->end - ctxt->input->cur;
718 if (avail < 2)
719 goto incomplete_sequence;
720 if ((cur[1] & 0xc0) != 0x80)
721 goto encoding_error;
722 if ((c & 0xe0) == 0xe0) {
723 if (avail < 3)
724 goto incomplete_sequence;
725 if ((cur[2] & 0xc0) != 0x80)
726 goto encoding_error;
727 if ((c & 0xf0) == 0xf0) {
728 if (avail < 4)
729 goto incomplete_sequence;
730 if (((c & 0xf8) != 0xf0) ||
731 ((cur[3] & 0xc0) != 0x80))
732 goto encoding_error;
733 /* 4-byte code */
734 *len = 4;
735 val = (cur[0] & 0x7) << 18;
736 val |= (cur[1] & 0x3f) << 12;
737 val |= (cur[2] & 0x3f) << 6;
738 val |= cur[3] & 0x3f;
739 if (val < 0x10000)
740 goto encoding_error;
741 } else {
742 /* 3-byte code */
743 *len = 3;
744 val = (cur[0] & 0xf) << 12;
745 val |= (cur[1] & 0x3f) << 6;
746 val |= cur[2] & 0x3f;
747 if (val < 0x800)
748 goto encoding_error;
750 } else {
751 /* 2-byte code */
752 *len = 2;
753 val = (cur[0] & 0x1f) << 6;
754 val |= cur[1] & 0x3f;
755 if (val < 0x80)
756 goto encoding_error;
758 if (!IS_CHAR(val)) {
759 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
760 "Char 0x%X out of allowed range\n", val);
762 return(val);
763 } else {
764 /* 1-byte code */
765 *len = 1;
766 if ((*ctxt->input->cur == 0) &&
767 (ctxt->input->end > ctxt->input->cur)) {
768 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
769 "Char 0x0 out of allowed range\n", 0);
771 if (*ctxt->input->cur == 0xD) {
772 if (ctxt->input->cur[1] == 0xA) {
773 ctxt->input->cur++;
775 return(0xA);
777 return(*ctxt->input->cur);
781 * Assume it's a fixed length encoding (1) with
782 * a compatible encoding for the ASCII set, since
783 * XML constructs only use < 128 chars
785 *len = 1;
786 if (*ctxt->input->cur == 0xD) {
787 if (ctxt->input->cur[1] == 0xA) {
788 ctxt->input->cur++;
790 return(0xA);
792 return(*ctxt->input->cur);
794 encoding_error:
796 * If we detect an UTF8 error that probably mean that the
797 * input encoding didn't get properly advertised in the
798 * declaration header. Report the error and switch the encoding
799 * to ISO-Latin-1 (if you don't like this policy, just declare the
800 * encoding !)
802 if (ctxt->input->end - ctxt->input->cur < 4) {
803 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
804 "Input is not proper UTF-8, indicate encoding !\n",
805 NULL, NULL);
806 } else {
807 char buffer[150];
809 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
810 ctxt->input->cur[0], ctxt->input->cur[1],
811 ctxt->input->cur[2], ctxt->input->cur[3]);
812 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
813 "Input is not proper UTF-8, indicate encoding !\n%s",
814 BAD_CAST buffer, NULL);
816 ctxt->charset = XML_CHAR_ENCODING_8859_1;
817 *len = 1;
818 return(*ctxt->input->cur);
820 incomplete_sequence:
822 * An encoding problem may arise from a truncated input buffer
823 * splitting a character in the middle. In that case do not raise
824 * an error but return 0. This should only happen when push parsing
825 * char data.
827 *len = 0;
828 return(0);
832 * xmlStringCurrentChar:
833 * @ctxt: the XML parser context
834 * @cur: pointer to the beginning of the char
835 * @len: pointer to the length of the char read
837 * DEPRECATED: Internal function, do not use.
839 * The current char value, if using UTF-8 this may actually span multiple
840 * bytes in the input buffer.
842 * Returns the current char value and its length
846 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
848 if ((len == NULL) || (cur == NULL)) return(0);
849 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
851 * We are supposed to handle UTF8, check it's valid
852 * From rfc2044: encoding of the Unicode values on UTF-8:
854 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
855 * 0000 0000-0000 007F 0xxxxxxx
856 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
857 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
859 * Check for the 0x110000 limit too
861 unsigned char c;
862 unsigned int val;
864 c = *cur;
865 if (c & 0x80) {
866 if ((cur[1] & 0xc0) != 0x80)
867 goto encoding_error;
868 if ((c & 0xe0) == 0xe0) {
870 if ((cur[2] & 0xc0) != 0x80)
871 goto encoding_error;
872 if ((c & 0xf0) == 0xf0) {
873 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
874 goto encoding_error;
875 /* 4-byte code */
876 *len = 4;
877 val = (cur[0] & 0x7) << 18;
878 val |= (cur[1] & 0x3f) << 12;
879 val |= (cur[2] & 0x3f) << 6;
880 val |= cur[3] & 0x3f;
881 } else {
882 /* 3-byte code */
883 *len = 3;
884 val = (cur[0] & 0xf) << 12;
885 val |= (cur[1] & 0x3f) << 6;
886 val |= cur[2] & 0x3f;
888 } else {
889 /* 2-byte code */
890 *len = 2;
891 val = (cur[0] & 0x1f) << 6;
892 val |= cur[1] & 0x3f;
894 if (!IS_CHAR(val)) {
895 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
896 "Char 0x%X out of allowed range\n", val);
898 return (val);
899 } else {
900 /* 1-byte code */
901 *len = 1;
902 return (*cur);
906 * Assume it's a fixed length encoding (1) with
907 * a compatible encoding for the ASCII set, since
908 * XML constructs only use < 128 chars
910 *len = 1;
911 return (*cur);
912 encoding_error:
915 * An encoding problem may arise from a truncated input buffer
916 * splitting a character in the middle. In that case do not raise
917 * an error but return 0 to indicate an end of stream problem
919 if ((ctxt == NULL) || (ctxt->input == NULL) ||
920 (ctxt->input->end - ctxt->input->cur < 4)) {
921 *len = 0;
922 return(0);
925 * If we detect an UTF8 error that probably mean that the
926 * input encoding didn't get properly advertised in the
927 * declaration header. Report the error and switch the encoding
928 * to ISO-Latin-1 (if you don't like this policy, just declare the
929 * encoding !)
932 char buffer[150];
934 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
935 ctxt->input->cur[0], ctxt->input->cur[1],
936 ctxt->input->cur[2], ctxt->input->cur[3]);
937 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
938 "Input is not proper UTF-8, indicate encoding !\n%s",
939 BAD_CAST buffer, NULL);
941 *len = 1;
942 return (*cur);
946 * xmlCopyCharMultiByte:
947 * @out: pointer to an array of xmlChar
948 * @val: the char value
950 * append the char value in the array
952 * Returns the number of xmlChar written
955 xmlCopyCharMultiByte(xmlChar *out, int val) {
956 if ((out == NULL) || (val < 0)) return(0);
958 * We are supposed to handle UTF8, check it's valid
959 * From rfc2044: encoding of the Unicode values on UTF-8:
961 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
962 * 0000 0000-0000 007F 0xxxxxxx
963 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
964 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
966 if (val >= 0x80) {
967 xmlChar *savedout = out;
968 int bits;
969 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
970 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
971 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
972 else {
973 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
974 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
975 val);
976 return(0);
978 for ( ; bits >= 0; bits-= 6)
979 *out++= ((val >> bits) & 0x3F) | 0x80 ;
980 return (out - savedout);
982 *out = val;
983 return 1;
987 * xmlCopyChar:
988 * @len: Ignored, compatibility
989 * @out: pointer to an array of xmlChar
990 * @val: the char value
992 * append the char value in the array
994 * Returns the number of xmlChar written
998 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
999 if ((out == NULL) || (val < 0)) return(0);
1000 /* the len parameter is ignored */
1001 if (val >= 0x80) {
1002 return(xmlCopyCharMultiByte (out, val));
1004 *out = val;
1005 return 1;
1008 /************************************************************************
1010 * Commodity functions to switch encodings *
1012 ************************************************************************/
1014 static xmlCharEncodingHandlerPtr
1015 xmlDetectEBCDIC(xmlParserInputPtr input) {
1016 xmlChar out[200];
1017 xmlCharEncodingHandlerPtr handler;
1018 int inlen, outlen, res, i;
1021 * To detect the EBCDIC code page, we convert the first 200 bytes
1022 * to EBCDIC-US and try to find the encoding declaration.
1024 handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1025 if (handler == NULL)
1026 return(NULL);
1027 outlen = sizeof(out) - 1;
1028 inlen = input->end - input->cur;
1029 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen, 0);
1030 if (res < 0)
1031 return(handler);
1032 out[outlen] = 0;
1034 for (i = 0; i < outlen; i++) {
1035 if (out[i] == '>')
1036 break;
1037 if ((out[i] == 'e') &&
1038 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1039 int start, cur, quote;
1041 i += 8;
1042 while (IS_BLANK_CH(out[i]))
1043 i += 1;
1044 if (out[i++] != '=')
1045 break;
1046 while (IS_BLANK_CH(out[i]))
1047 i += 1;
1048 quote = out[i++];
1049 if ((quote != '\'') && (quote != '"'))
1050 break;
1051 start = i;
1052 cur = out[i];
1053 while (((cur >= 'a') && (cur <= 'z')) ||
1054 ((cur >= 'A') && (cur <= 'Z')) ||
1055 ((cur >= '0') && (cur <= '9')) ||
1056 (cur == '.') || (cur == '_') ||
1057 (cur == '-'))
1058 cur = out[++i];
1059 if (cur != quote)
1060 break;
1061 out[i] = 0;
1062 xmlCharEncCloseFunc(handler);
1063 handler = xmlFindCharEncodingHandler((char *) out + start);
1064 break;
1068 return(handler);
1072 * xmlSwitchEncoding:
1073 * @ctxt: the parser context
1074 * @enc: the encoding value (number)
1076 * change the input functions when discovering the character encoding
1077 * of a given entity.
1079 * Returns 0 in case of success, -1 otherwise
1082 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1084 xmlCharEncodingHandlerPtr handler;
1085 int ret;
1087 if (ctxt == NULL) return(-1);
1088 switch (enc) {
1089 case XML_CHAR_ENCODING_ERROR:
1090 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
1091 "encoding unknown\n", NULL, NULL);
1092 return(-1);
1093 case XML_CHAR_ENCODING_NONE:
1094 /* let's assume it's UTF-8 without the XML decl */
1095 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1096 return(0);
1097 case XML_CHAR_ENCODING_UTF8:
1098 /* default encoding, no conversion should be needed */
1099 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1102 * Errata on XML-1.0 June 20 2001
1103 * Specific handling of the Byte Order Mark for
1104 * UTF-8
1106 if ((ctxt->input != NULL) &&
1107 (ctxt->input->cur[0] == 0xEF) &&
1108 (ctxt->input->cur[1] == 0xBB) &&
1109 (ctxt->input->cur[2] == 0xBF)) {
1110 ctxt->input->cur += 3;
1112 return(0);
1113 case XML_CHAR_ENCODING_EBCDIC:
1114 handler = xmlDetectEBCDIC(ctxt->input);
1115 break;
1116 default:
1117 handler = xmlGetCharEncodingHandler(enc);
1118 break;
1120 if (handler == NULL) {
1122 * Default handlers.
1124 switch (enc) {
1125 case XML_CHAR_ENCODING_ASCII:
1126 /* default encoding, no conversion should be needed */
1127 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1128 return(0);
1129 case XML_CHAR_ENCODING_8859_1:
1130 if ((ctxt->inputNr == 1) &&
1131 (ctxt->encoding == NULL) &&
1132 (ctxt->input != NULL) &&
1133 (ctxt->input->encoding != NULL)) {
1134 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1136 ctxt->charset = enc;
1137 return(0);
1138 default:
1139 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1140 "encoding not supported: %s\n",
1141 BAD_CAST xmlGetCharEncodingName(enc), NULL);
1143 * TODO: We could recover from errors in external entities
1144 * if we didn't stop the parser. But most callers of this
1145 * function don't check the return value.
1147 xmlStopParser(ctxt);
1148 return(-1);
1151 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1152 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1154 * on encoding conversion errors, stop the parser
1156 xmlStopParser(ctxt);
1157 ctxt->errNo = XML_I18N_CONV_FAILED;
1159 return(ret);
1163 * xmlSwitchInputEncoding:
1164 * @ctxt: the parser context
1165 * @input: the input stream
1166 * @handler: the encoding handler
1168 * change the input functions when discovering the character encoding
1169 * of a given entity.
1171 * Returns 0 in case of success, -1 otherwise
1174 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1175 xmlCharEncodingHandlerPtr handler)
1177 int nbchars;
1178 xmlParserInputBufferPtr in;
1180 if (handler == NULL)
1181 return (-1);
1182 if (input == NULL)
1183 return (-1);
1184 in = input->buf;
1185 if (in == NULL) {
1186 xmlErrInternal(ctxt,
1187 "static memory buffer doesn't support encoding\n", NULL);
1189 * Callers assume that the input buffer takes ownership of the
1190 * encoding handler. xmlCharEncCloseFunc frees unregistered
1191 * handlers and avoids a memory leak.
1193 xmlCharEncCloseFunc(handler);
1194 return (-1);
1197 if (in->encoder != NULL) {
1198 if (in->encoder == handler)
1199 return (0);
1202 * Switching encodings during parsing is a really bad idea,
1203 * but WebKit/Chromium switches from ISO-8859-1 to UTF-16 as soon as
1204 * it finds Unicode characters with code points larger than 255.
1206 * TODO: We should check whether the "raw" input buffer is empty and
1207 * convert the old content using the old encoder.
1210 xmlCharEncCloseFunc(in->encoder);
1211 in->encoder = handler;
1212 return (0);
1215 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1216 in->encoder = handler;
1219 * Is there already some content down the pipe to convert ?
1221 if (xmlBufIsEmpty(in->buffer) == 0) {
1222 size_t processed, use, consumed;
1225 * Specific handling of the Byte Order Mark for
1226 * UTF-16
1228 if ((handler->name != NULL) &&
1229 (!strcmp(handler->name, "UTF-16LE") ||
1230 !strcmp(handler->name, "UTF-16")) &&
1231 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1232 input->cur += 2;
1234 if ((handler->name != NULL) &&
1235 (!strcmp(handler->name, "UTF-16BE")) &&
1236 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1237 input->cur += 2;
1240 * Errata on XML-1.0 June 20 2001
1241 * Specific handling of the Byte Order Mark for
1242 * UTF-8
1244 if ((handler->name != NULL) &&
1245 (!strcmp(handler->name, "UTF-8")) &&
1246 (input->cur[0] == 0xEF) &&
1247 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1248 input->cur += 3;
1252 * Shrink the current input buffer.
1253 * Move it as the raw buffer and create a new input buffer
1255 processed = input->cur - input->base;
1256 xmlBufShrink(in->buffer, processed);
1257 input->consumed += processed;
1258 in->raw = in->buffer;
1259 in->buffer = xmlBufCreate();
1260 in->rawconsumed = processed;
1261 use = xmlBufUse(in->raw);
1264 * TODO: We must flush and decode the whole buffer to make functions
1265 * like xmlReadMemory work with a user-provided encoding. If the
1266 * encoding is specified directly, we should probably set
1267 * XML_PARSE_IGNORE_ENC in xmlDoRead to avoid switching encodings
1268 * twice. Then we could set "flush" to false which should save
1269 * a considerable amount of memory when parsing from memory.
1270 * It's probably even possible to remove this whole if-block
1271 * completely.
1273 nbchars = xmlCharEncInput(in, 1);
1274 xmlBufResetInput(in->buffer, input);
1275 if (nbchars < 0) {
1276 /* TODO: This could be an out of memory or an encoding error. */
1277 xmlErrInternal(ctxt,
1278 "switching encoding: encoder error\n",
1279 NULL);
1280 xmlHaltParser(ctxt);
1281 return (-1);
1283 consumed = use - xmlBufUse(in->raw);
1284 if ((consumed > ULONG_MAX) ||
1285 (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))
1286 in->rawconsumed = ULONG_MAX;
1287 else
1288 in->rawconsumed += consumed;
1290 return (0);
1294 * xmlSwitchToEncoding:
1295 * @ctxt: the parser context
1296 * @handler: the encoding handler
1298 * change the input functions when discovering the character encoding
1299 * of a given entity.
1301 * Returns 0 in case of success, -1 otherwise
1304 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1306 if (ctxt == NULL)
1307 return(-1);
1308 return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1311 /************************************************************************
1313 * Commodity functions to handle entities processing *
1315 ************************************************************************/
1318 * xmlFreeInputStream:
1319 * @input: an xmlParserInputPtr
1321 * Free up an input stream.
1323 void
1324 xmlFreeInputStream(xmlParserInputPtr input) {
1325 if (input == NULL) return;
1327 if (input->filename != NULL) xmlFree((char *) input->filename);
1328 if (input->directory != NULL) xmlFree((char *) input->directory);
1329 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1330 if (input->version != NULL) xmlFree((char *) input->version);
1331 if ((input->free != NULL) && (input->base != NULL))
1332 input->free((xmlChar *) input->base);
1333 if (input->buf != NULL)
1334 xmlFreeParserInputBuffer(input->buf);
1335 xmlFree(input);
1339 * xmlNewInputStream:
1340 * @ctxt: an XML parser context
1342 * Create a new input stream structure.
1344 * Returns the new input stream or NULL
1346 xmlParserInputPtr
1347 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1348 xmlParserInputPtr input;
1350 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1351 if (input == NULL) {
1352 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1353 return(NULL);
1355 memset(input, 0, sizeof(xmlParserInput));
1356 input->line = 1;
1357 input->col = 1;
1358 input->standalone = -1;
1361 * If the context is NULL the id cannot be initialized, but that
1362 * should not happen while parsing which is the situation where
1363 * the id is actually needed.
1365 if (ctxt != NULL) {
1366 if (input->id >= INT_MAX) {
1367 xmlErrMemory(ctxt, "Input ID overflow\n");
1368 return(NULL);
1370 input->id = ctxt->input_id++;
1373 return(input);
1377 * xmlNewIOInputStream:
1378 * @ctxt: an XML parser context
1379 * @input: an I/O Input
1380 * @enc: the charset encoding if known
1382 * Create a new input stream structure encapsulating the @input into
1383 * a stream suitable for the parser.
1385 * Returns the new input stream or NULL
1387 xmlParserInputPtr
1388 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1389 xmlCharEncoding enc) {
1390 xmlParserInputPtr inputStream;
1392 if (input == NULL) return(NULL);
1393 if (xmlParserDebugEntities)
1394 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1395 inputStream = xmlNewInputStream(ctxt);
1396 if (inputStream == NULL) {
1397 return(NULL);
1399 inputStream->filename = NULL;
1400 inputStream->buf = input;
1401 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1403 if (enc != XML_CHAR_ENCODING_NONE) {
1404 xmlSwitchEncoding(ctxt, enc);
1407 return(inputStream);
1411 * xmlNewEntityInputStream:
1412 * @ctxt: an XML parser context
1413 * @entity: an Entity pointer
1415 * DEPRECATED: Internal function, do not use.
1417 * Create a new input stream based on an xmlEntityPtr
1419 * Returns the new input stream or NULL
1421 xmlParserInputPtr
1422 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1423 xmlParserInputPtr input;
1425 if (entity == NULL) {
1426 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1427 NULL);
1428 return(NULL);
1430 if (xmlParserDebugEntities)
1431 xmlGenericError(xmlGenericErrorContext,
1432 "new input from entity: %s\n", entity->name);
1433 if (entity->content == NULL) {
1434 switch (entity->etype) {
1435 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1436 xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1437 entity->name);
1438 break;
1439 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1440 case XML_EXTERNAL_PARAMETER_ENTITY:
1441 input = xmlLoadExternalEntity((char *) entity->URI,
1442 (char *) entity->ExternalID, ctxt);
1443 if (input != NULL)
1444 input->entity = entity;
1445 return(input);
1446 case XML_INTERNAL_GENERAL_ENTITY:
1447 xmlErrInternal(ctxt,
1448 "Internal entity %s without content !\n",
1449 entity->name);
1450 break;
1451 case XML_INTERNAL_PARAMETER_ENTITY:
1452 xmlErrInternal(ctxt,
1453 "Internal parameter entity %s without content !\n",
1454 entity->name);
1455 break;
1456 case XML_INTERNAL_PREDEFINED_ENTITY:
1457 xmlErrInternal(ctxt,
1458 "Predefined entity %s without content !\n",
1459 entity->name);
1460 break;
1462 return(NULL);
1464 input = xmlNewInputStream(ctxt);
1465 if (input == NULL) {
1466 return(NULL);
1468 if (entity->URI != NULL)
1469 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1470 input->base = entity->content;
1471 if (entity->length == 0)
1472 entity->length = xmlStrlen(entity->content);
1473 input->cur = entity->content;
1474 input->length = entity->length;
1475 input->end = &entity->content[input->length];
1476 input->entity = entity;
1477 return(input);
1481 * xmlNewStringInputStream:
1482 * @ctxt: an XML parser context
1483 * @buffer: an memory buffer
1485 * Create a new input stream based on a memory buffer.
1486 * Returns the new input stream
1488 xmlParserInputPtr
1489 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1490 xmlParserInputPtr input;
1491 xmlParserInputBufferPtr buf;
1493 if (buffer == NULL) {
1494 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1495 NULL);
1496 return(NULL);
1498 if (xmlParserDebugEntities)
1499 xmlGenericError(xmlGenericErrorContext,
1500 "new fixed input: %.30s\n", buffer);
1501 buf = xmlParserInputBufferCreateMem((const char *) buffer,
1502 xmlStrlen(buffer),
1503 XML_CHAR_ENCODING_NONE);
1504 if (buf == NULL) {
1505 xmlErrMemory(ctxt, NULL);
1506 return(NULL);
1508 input = xmlNewInputStream(ctxt);
1509 if (input == NULL) {
1510 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1511 xmlFreeParserInputBuffer(buf);
1512 return(NULL);
1514 input->buf = buf;
1515 xmlBufResetInput(input->buf->buffer, input);
1516 return(input);
1520 * xmlNewInputFromFile:
1521 * @ctxt: an XML parser context
1522 * @filename: the filename to use as entity
1524 * Create a new input stream based on a file or an URL.
1526 * Returns the new input stream or NULL in case of error
1528 xmlParserInputPtr
1529 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1530 xmlParserInputBufferPtr buf;
1531 xmlParserInputPtr inputStream;
1532 char *directory = NULL;
1533 xmlChar *URI = NULL;
1535 if (xmlParserDebugEntities)
1536 xmlGenericError(xmlGenericErrorContext,
1537 "new input from file: %s\n", filename);
1538 if (ctxt == NULL) return(NULL);
1539 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1540 if (buf == NULL) {
1541 if (filename == NULL)
1542 __xmlLoaderErr(ctxt,
1543 "failed to load external entity: NULL filename \n",
1544 NULL);
1545 else
1546 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1547 (const char *) filename);
1548 return(NULL);
1551 inputStream = xmlNewInputStream(ctxt);
1552 if (inputStream == NULL) {
1553 xmlFreeParserInputBuffer(buf);
1554 return(NULL);
1557 inputStream->buf = buf;
1558 inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1559 if (inputStream == NULL)
1560 return(NULL);
1562 if (inputStream->filename == NULL)
1563 URI = xmlStrdup((xmlChar *) filename);
1564 else
1565 URI = xmlStrdup((xmlChar *) inputStream->filename);
1566 directory = xmlParserGetDirectory((const char *) URI);
1567 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1568 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1569 if (URI != NULL) xmlFree((char *) URI);
1570 inputStream->directory = directory;
1572 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1573 if ((ctxt->directory == NULL) && (directory != NULL))
1574 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1575 return(inputStream);
1578 /************************************************************************
1580 * Commodity functions to handle parser contexts *
1582 ************************************************************************/
1585 * xmlInitSAXParserCtxt:
1586 * @ctxt: XML parser context
1587 * @sax: SAX handlert
1588 * @userData: user data
1590 * Initialize a SAX parser context
1592 * Returns 0 in case of success and -1 in case of error
1595 static int
1596 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1597 void *userData)
1599 xmlParserInputPtr input;
1601 if(ctxt==NULL) {
1602 xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1603 return(-1);
1606 xmlInitParser();
1608 if (ctxt->dict == NULL)
1609 ctxt->dict = xmlDictCreate();
1610 if (ctxt->dict == NULL) {
1611 xmlErrMemory(NULL, "cannot initialize parser context\n");
1612 return(-1);
1614 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1616 if (ctxt->sax == NULL)
1617 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1618 if (ctxt->sax == NULL) {
1619 xmlErrMemory(NULL, "cannot initialize parser context\n");
1620 return(-1);
1622 if (sax == NULL) {
1623 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1624 xmlSAXVersion(ctxt->sax, 2);
1625 ctxt->userData = ctxt;
1626 } else {
1627 if (sax->initialized == XML_SAX2_MAGIC) {
1628 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1629 } else {
1630 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1631 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1633 ctxt->userData = userData ? userData : ctxt;
1636 ctxt->maxatts = 0;
1637 ctxt->atts = NULL;
1638 /* Allocate the Input stack */
1639 if (ctxt->inputTab == NULL) {
1640 ctxt->inputTab = (xmlParserInputPtr *)
1641 xmlMalloc(5 * sizeof(xmlParserInputPtr));
1642 ctxt->inputMax = 5;
1644 if (ctxt->inputTab == NULL) {
1645 xmlErrMemory(NULL, "cannot initialize parser context\n");
1646 ctxt->inputNr = 0;
1647 ctxt->inputMax = 0;
1648 ctxt->input = NULL;
1649 return(-1);
1651 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1652 xmlFreeInputStream(input);
1654 ctxt->inputNr = 0;
1655 ctxt->input = NULL;
1657 ctxt->version = NULL;
1658 ctxt->encoding = NULL;
1659 ctxt->standalone = -1;
1660 ctxt->hasExternalSubset = 0;
1661 ctxt->hasPErefs = 0;
1662 ctxt->html = 0;
1663 ctxt->external = 0;
1664 ctxt->instate = XML_PARSER_START;
1665 ctxt->token = 0;
1666 ctxt->directory = NULL;
1668 /* Allocate the Node stack */
1669 if (ctxt->nodeTab == NULL) {
1670 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1671 ctxt->nodeMax = 10;
1673 if (ctxt->nodeTab == NULL) {
1674 xmlErrMemory(NULL, "cannot initialize parser context\n");
1675 ctxt->nodeNr = 0;
1676 ctxt->nodeMax = 0;
1677 ctxt->node = NULL;
1678 ctxt->inputNr = 0;
1679 ctxt->inputMax = 0;
1680 ctxt->input = NULL;
1681 return(-1);
1683 ctxt->nodeNr = 0;
1684 ctxt->node = NULL;
1686 /* Allocate the Name stack */
1687 if (ctxt->nameTab == NULL) {
1688 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1689 ctxt->nameMax = 10;
1691 if (ctxt->nameTab == NULL) {
1692 xmlErrMemory(NULL, "cannot initialize parser context\n");
1693 ctxt->nodeNr = 0;
1694 ctxt->nodeMax = 0;
1695 ctxt->node = NULL;
1696 ctxt->inputNr = 0;
1697 ctxt->inputMax = 0;
1698 ctxt->input = NULL;
1699 ctxt->nameNr = 0;
1700 ctxt->nameMax = 0;
1701 ctxt->name = NULL;
1702 return(-1);
1704 ctxt->nameNr = 0;
1705 ctxt->name = NULL;
1707 /* Allocate the space stack */
1708 if (ctxt->spaceTab == NULL) {
1709 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1710 ctxt->spaceMax = 10;
1712 if (ctxt->spaceTab == NULL) {
1713 xmlErrMemory(NULL, "cannot initialize parser context\n");
1714 ctxt->nodeNr = 0;
1715 ctxt->nodeMax = 0;
1716 ctxt->node = NULL;
1717 ctxt->inputNr = 0;
1718 ctxt->inputMax = 0;
1719 ctxt->input = NULL;
1720 ctxt->nameNr = 0;
1721 ctxt->nameMax = 0;
1722 ctxt->name = NULL;
1723 ctxt->spaceNr = 0;
1724 ctxt->spaceMax = 0;
1725 ctxt->space = NULL;
1726 return(-1);
1728 ctxt->spaceNr = 1;
1729 ctxt->spaceMax = 10;
1730 ctxt->spaceTab[0] = -1;
1731 ctxt->space = &ctxt->spaceTab[0];
1732 ctxt->myDoc = NULL;
1733 ctxt->wellFormed = 1;
1734 ctxt->nsWellFormed = 1;
1735 ctxt->valid = 1;
1736 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1737 if (ctxt->loadsubset) {
1738 ctxt->options |= XML_PARSE_DTDLOAD;
1740 ctxt->validate = xmlDoValidityCheckingDefaultValue;
1741 ctxt->pedantic = xmlPedanticParserDefaultValue;
1742 if (ctxt->pedantic) {
1743 ctxt->options |= XML_PARSE_PEDANTIC;
1745 ctxt->linenumbers = xmlLineNumbersDefaultValue;
1746 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1747 if (ctxt->keepBlanks == 0) {
1748 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1749 ctxt->options |= XML_PARSE_NOBLANKS;
1752 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1753 ctxt->vctxt.userData = ctxt;
1754 ctxt->vctxt.error = xmlParserValidityError;
1755 ctxt->vctxt.warning = xmlParserValidityWarning;
1756 if (ctxt->validate) {
1757 if (xmlGetWarningsDefaultValue == 0)
1758 ctxt->vctxt.warning = NULL;
1759 else
1760 ctxt->vctxt.warning = xmlParserValidityWarning;
1761 ctxt->vctxt.nodeMax = 0;
1762 ctxt->options |= XML_PARSE_DTDVALID;
1764 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1765 if (ctxt->replaceEntities) {
1766 ctxt->options |= XML_PARSE_NOENT;
1768 ctxt->record_info = 0;
1769 ctxt->checkIndex = 0;
1770 ctxt->inSubset = 0;
1771 ctxt->errNo = XML_ERR_OK;
1772 ctxt->depth = 0;
1773 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1774 ctxt->catalogs = NULL;
1775 ctxt->sizeentities = 0;
1776 ctxt->sizeentcopy = 0;
1777 ctxt->input_id = 1;
1778 xmlInitNodeInfoSeq(&ctxt->node_seq);
1779 return(0);
1783 * xmlInitParserCtxt:
1784 * @ctxt: an XML parser context
1786 * DEPRECATED: Internal function which will be made private in a future
1787 * version.
1789 * Initialize a parser context
1791 * Returns 0 in case of success and -1 in case of error
1795 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1797 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1801 * xmlFreeParserCtxt:
1802 * @ctxt: an XML parser context
1804 * Free all the memory used by a parser context. However the parsed
1805 * document in ctxt->myDoc is not freed.
1808 void
1809 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1811 xmlParserInputPtr input;
1813 if (ctxt == NULL) return;
1815 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1816 xmlFreeInputStream(input);
1818 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1819 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1820 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1821 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1822 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1823 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1824 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1825 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1826 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1827 #ifdef LIBXML_SAX1_ENABLED
1828 if ((ctxt->sax != NULL) &&
1829 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1830 #else
1831 if (ctxt->sax != NULL)
1832 #endif /* LIBXML_SAX1_ENABLED */
1833 xmlFree(ctxt->sax);
1834 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1835 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1836 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1837 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1838 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1839 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1840 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1841 if (ctxt->attsDefault != NULL)
1842 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1843 if (ctxt->attsSpecial != NULL)
1844 xmlHashFree(ctxt->attsSpecial, NULL);
1845 if (ctxt->freeElems != NULL) {
1846 xmlNodePtr cur, next;
1848 cur = ctxt->freeElems;
1849 while (cur != NULL) {
1850 next = cur->next;
1851 xmlFree(cur);
1852 cur = next;
1855 if (ctxt->freeAttrs != NULL) {
1856 xmlAttrPtr cur, next;
1858 cur = ctxt->freeAttrs;
1859 while (cur != NULL) {
1860 next = cur->next;
1861 xmlFree(cur);
1862 cur = next;
1866 * cleanup the error strings
1868 if (ctxt->lastError.message != NULL)
1869 xmlFree(ctxt->lastError.message);
1870 if (ctxt->lastError.file != NULL)
1871 xmlFree(ctxt->lastError.file);
1872 if (ctxt->lastError.str1 != NULL)
1873 xmlFree(ctxt->lastError.str1);
1874 if (ctxt->lastError.str2 != NULL)
1875 xmlFree(ctxt->lastError.str2);
1876 if (ctxt->lastError.str3 != NULL)
1877 xmlFree(ctxt->lastError.str3);
1879 #ifdef LIBXML_CATALOG_ENABLED
1880 if (ctxt->catalogs != NULL)
1881 xmlCatalogFreeLocal(ctxt->catalogs);
1882 #endif
1883 xmlFree(ctxt);
1887 * xmlNewParserCtxt:
1889 * Allocate and initialize a new parser context.
1891 * Returns the xmlParserCtxtPtr or NULL
1894 xmlParserCtxtPtr
1895 xmlNewParserCtxt(void)
1897 return(xmlNewSAXParserCtxt(NULL, NULL));
1901 * xmlNewSAXParserCtxt:
1902 * @sax: SAX handler
1903 * @userData: user data
1905 * Allocate and initialize a new SAX parser context. If userData is NULL,
1906 * the parser context will be passed as user data.
1908 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
1911 xmlParserCtxtPtr
1912 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
1914 xmlParserCtxtPtr ctxt;
1916 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1917 if (ctxt == NULL) {
1918 xmlErrMemory(NULL, "cannot allocate parser context\n");
1919 return(NULL);
1921 memset(ctxt, 0, sizeof(xmlParserCtxt));
1922 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
1923 xmlFreeParserCtxt(ctxt);
1924 return(NULL);
1926 return(ctxt);
1929 /************************************************************************
1931 * Handling of node information *
1933 ************************************************************************/
1936 * xmlClearParserCtxt:
1937 * @ctxt: an XML parser context
1939 * Clear (release owned resources) and reinitialize a parser context
1942 void
1943 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1945 if (ctxt==NULL)
1946 return;
1947 xmlClearNodeInfoSeq(&ctxt->node_seq);
1948 xmlCtxtReset(ctxt);
1953 * xmlParserFindNodeInfo:
1954 * @ctx: an XML parser context
1955 * @node: an XML node within the tree
1957 * DEPRECATED: Don't use.
1959 * Find the parser node info struct for a given node
1961 * Returns an xmlParserNodeInfo block pointer or NULL
1963 const xmlParserNodeInfo *
1964 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1966 unsigned long pos;
1968 if ((ctx == NULL) || (node == NULL))
1969 return (NULL);
1970 /* Find position where node should be at */
1971 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1972 if (pos < ctx->node_seq.length
1973 && ctx->node_seq.buffer[pos].node == node)
1974 return &ctx->node_seq.buffer[pos];
1975 else
1976 return NULL;
1981 * xmlInitNodeInfoSeq:
1982 * @seq: a node info sequence pointer
1984 * DEPRECATED: Don't use.
1986 * -- Initialize (set to initial state) node info sequence
1988 void
1989 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1991 if (seq == NULL)
1992 return;
1993 seq->length = 0;
1994 seq->maximum = 0;
1995 seq->buffer = NULL;
1999 * xmlClearNodeInfoSeq:
2000 * @seq: a node info sequence pointer
2002 * DEPRECATED: Don't use.
2004 * -- Clear (release memory and reinitialize) node
2005 * info sequence
2007 void
2008 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2010 if (seq == NULL)
2011 return;
2012 if (seq->buffer != NULL)
2013 xmlFree(seq->buffer);
2014 xmlInitNodeInfoSeq(seq);
2018 * xmlParserFindNodeInfoIndex:
2019 * @seq: a node info sequence pointer
2020 * @node: an XML node pointer
2022 * DEPRECATED: Don't use.
2024 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2025 * the given node is or should be at in a sorted sequence
2027 * Returns a long indicating the position of the record
2029 unsigned long
2030 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2031 const xmlNodePtr node)
2033 unsigned long upper, lower, middle;
2034 int found = 0;
2036 if ((seq == NULL) || (node == NULL))
2037 return ((unsigned long) -1);
2039 /* Do a binary search for the key */
2040 lower = 1;
2041 upper = seq->length;
2042 middle = 0;
2043 while (lower <= upper && !found) {
2044 middle = lower + (upper - lower) / 2;
2045 if (node == seq->buffer[middle - 1].node)
2046 found = 1;
2047 else if (node < seq->buffer[middle - 1].node)
2048 upper = middle - 1;
2049 else
2050 lower = middle + 1;
2053 /* Return position */
2054 if (middle == 0 || seq->buffer[middle - 1].node < node)
2055 return middle;
2056 else
2057 return middle - 1;
2062 * xmlParserAddNodeInfo:
2063 * @ctxt: an XML parser context
2064 * @info: a node info sequence pointer
2066 * DEPRECATED: Don't use.
2068 * Insert node info record into the sorted sequence
2070 void
2071 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2072 const xmlParserNodeInfoPtr info)
2074 unsigned long pos;
2076 if ((ctxt == NULL) || (info == NULL)) return;
2078 /* Find pos and check to see if node is already in the sequence */
2079 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2080 info->node);
2082 if ((pos < ctxt->node_seq.length) &&
2083 (ctxt->node_seq.buffer != NULL) &&
2084 (ctxt->node_seq.buffer[pos].node == info->node)) {
2085 ctxt->node_seq.buffer[pos] = *info;
2088 /* Otherwise, we need to add new node to buffer */
2089 else {
2090 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2091 (ctxt->node_seq.buffer == NULL)) {
2092 xmlParserNodeInfo *tmp_buffer;
2093 unsigned int byte_size;
2095 if (ctxt->node_seq.maximum == 0)
2096 ctxt->node_seq.maximum = 2;
2097 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2098 (2 * ctxt->node_seq.maximum));
2100 if (ctxt->node_seq.buffer == NULL)
2101 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2102 else
2103 tmp_buffer =
2104 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2105 byte_size);
2107 if (tmp_buffer == NULL) {
2108 xmlErrMemory(ctxt, "failed to allocate buffer\n");
2109 return;
2111 ctxt->node_seq.buffer = tmp_buffer;
2112 ctxt->node_seq.maximum *= 2;
2115 /* If position is not at end, move elements out of the way */
2116 if (pos != ctxt->node_seq.length) {
2117 unsigned long i;
2119 for (i = ctxt->node_seq.length; i > pos; i--)
2120 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2123 /* Copy element and increase length */
2124 ctxt->node_seq.buffer[pos] = *info;
2125 ctxt->node_seq.length++;
2129 /************************************************************************
2131 * Defaults settings *
2133 ************************************************************************/
2135 * xmlPedanticParserDefault:
2136 * @val: int 0 or 1
2138 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2140 * Set and return the previous value for enabling pedantic warnings.
2142 * Returns the last value for 0 for no substitution, 1 for substitution.
2146 xmlPedanticParserDefault(int val) {
2147 int old = xmlPedanticParserDefaultValue;
2149 xmlPedanticParserDefaultValue = val;
2150 return(old);
2154 * xmlLineNumbersDefault:
2155 * @val: int 0 or 1
2157 * DEPRECATED: The modern options API always enables line numbers.
2159 * Set and return the previous value for enabling line numbers in elements
2160 * contents. This may break on old application and is turned off by default.
2162 * Returns the last value for 0 for no substitution, 1 for substitution.
2166 xmlLineNumbersDefault(int val) {
2167 int old = xmlLineNumbersDefaultValue;
2169 xmlLineNumbersDefaultValue = val;
2170 return(old);
2174 * xmlSubstituteEntitiesDefault:
2175 * @val: int 0 or 1
2177 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2179 * Set and return the previous value for default entity support.
2180 * Initially the parser always keep entity references instead of substituting
2181 * entity values in the output. This function has to be used to change the
2182 * default parser behavior
2183 * SAX::substituteEntities() has to be used for changing that on a file by
2184 * file basis.
2186 * Returns the last value for 0 for no substitution, 1 for substitution.
2190 xmlSubstituteEntitiesDefault(int val) {
2191 int old = xmlSubstituteEntitiesDefaultValue;
2193 xmlSubstituteEntitiesDefaultValue = val;
2194 return(old);
2198 * xmlKeepBlanksDefault:
2199 * @val: int 0 or 1
2201 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2203 * Set and return the previous value for default blanks text nodes support.
2204 * The 1.x version of the parser used an heuristic to try to detect
2205 * ignorable white spaces. As a result the SAX callback was generating
2206 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2207 * using the DOM output text nodes containing those blanks were not generated.
2208 * The 2.x and later version will switch to the XML standard way and
2209 * ignorableWhitespace() are only generated when running the parser in
2210 * validating mode and when the current element doesn't allow CDATA or
2211 * mixed content.
2212 * This function is provided as a way to force the standard behavior
2213 * on 1.X libs and to switch back to the old mode for compatibility when
2214 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2215 * by using xmlIsBlankNode() commodity function to detect the "empty"
2216 * nodes generated.
2217 * This value also affect autogeneration of indentation when saving code
2218 * if blanks sections are kept, indentation is not generated.
2220 * Returns the last value for 0 for no substitution, 1 for substitution.
2224 xmlKeepBlanksDefault(int val) {
2225 int old = xmlKeepBlanksDefaultValue;
2227 xmlKeepBlanksDefaultValue = val;
2228 if (!val) xmlIndentTreeOutput = 1;
2229 return(old);