2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
5 * See Copyright for the status of this software.
14 #define XML_DIR_SEP '\\'
16 #define XML_DIR_SEP '/'
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/valid.h>
28 #include <libxml/entities.h>
29 #include <libxml/xmlerror.h>
30 #include <libxml/encoding.h>
31 #include <libxml/valid.h>
32 #include <libxml/xmlIO.h>
33 #include <libxml/uri.h>
34 #include <libxml/dict.h>
35 #include <libxml/SAX.h>
36 #ifdef LIBXML_CATALOG_ENABLED
37 #include <libxml/catalog.h>
39 #include <libxml/globals.h>
40 #include <libxml/chvalid.h>
42 #define CUR(ctxt) ctxt->input->cur
43 #define END(ctxt) ctxt->input->end
44 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
50 * Various global defaults for parsing
55 * @version: the include version number
57 * check the compiled lib version against the include one.
58 * This can warn or immediately kill the application
61 xmlCheckVersion(int version
) {
62 int myversion
= (int) LIBXML_VERSION
;
66 if ((myversion
/ 10000) != (version
/ 10000)) {
67 xmlGenericError(xmlGenericErrorContext
,
68 "Fatal: program compiled against libxml %d using libxml %d\n",
69 (version
/ 10000), (myversion
/ 10000));
71 "Fatal: program compiled against libxml %d using libxml %d\n",
72 (version
/ 10000), (myversion
/ 10000));
74 if ((myversion
/ 100) < (version
/ 100)) {
75 xmlGenericError(xmlGenericErrorContext
,
76 "Warning: program compiled against libxml %d using older %d\n",
77 (version
/ 100), (myversion
/ 100));
82 /************************************************************************
84 * Some factorized error routines *
86 ************************************************************************/
91 * @ctxt: an XML parser context
92 * @extra: extra information
94 * Handle a redefinition of attribute error
97 xmlErrMemory(xmlParserCtxtPtr ctxt
, const char *extra
)
99 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
100 (ctxt
->instate
== XML_PARSER_EOF
))
103 ctxt
->errNo
= XML_ERR_NO_MEMORY
;
104 ctxt
->instate
= XML_PARSER_EOF
;
105 ctxt
->disableSAX
= 1;
108 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
109 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0, extra
,
111 "Memory allocation failed : %s\n", extra
);
113 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
114 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0, NULL
,
115 NULL
, NULL
, 0, 0, "Memory allocation failed\n");
120 * @ctxt: an XML parser context
121 * @xmlerr: the error number
122 * @msg: the error message
123 * @str1: an string info
124 * @str2: an string info
126 * Handle an encoding error
129 __xmlErrEncoding(xmlParserCtxtPtr ctxt
, xmlParserErrors xmlerr
,
130 const char *msg
, const xmlChar
* str1
, const xmlChar
* str2
)
132 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
133 (ctxt
->instate
== XML_PARSER_EOF
))
136 ctxt
->errNo
= xmlerr
;
137 __xmlRaiseError(NULL
, NULL
, NULL
,
138 ctxt
, NULL
, XML_FROM_PARSER
, xmlerr
, XML_ERR_FATAL
,
139 NULL
, 0, (const char *) str1
, (const char *) str2
,
140 NULL
, 0, 0, msg
, str1
, str2
);
142 ctxt
->wellFormed
= 0;
143 if (ctxt
->recovery
== 0)
144 ctxt
->disableSAX
= 1;
150 * @ctxt: an XML parser context
151 * @msg: the error message
152 * @str: error information
154 * Handle an internal error
156 static void LIBXML_ATTR_FORMAT(2,0)
157 xmlErrInternal(xmlParserCtxtPtr ctxt
, const char *msg
, const xmlChar
* str
)
159 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
160 (ctxt
->instate
== XML_PARSER_EOF
))
163 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
164 __xmlRaiseError(NULL
, NULL
, NULL
,
165 ctxt
, NULL
, XML_FROM_PARSER
, XML_ERR_INTERNAL_ERROR
,
166 XML_ERR_FATAL
, NULL
, 0, (const char *) str
, NULL
, NULL
,
169 ctxt
->wellFormed
= 0;
170 if (ctxt
->recovery
== 0)
171 ctxt
->disableSAX
= 1;
177 * @ctxt: an XML parser context
178 * @error: the error number
179 * @msg: the error message
180 * @val: an integer value
184 static void LIBXML_ATTR_FORMAT(3,0)
185 xmlErrEncodingInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
186 const char *msg
, int val
)
188 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
189 (ctxt
->instate
== XML_PARSER_EOF
))
193 __xmlRaiseError(NULL
, NULL
, NULL
,
194 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
195 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
197 ctxt
->wellFormed
= 0;
198 if (ctxt
->recovery
== 0)
199 ctxt
->disableSAX
= 1;
205 * @c: an unicode character (int)
207 * Check whether the character is allowed by the production
208 * [84] Letter ::= BaseChar | Ideographic
210 * Returns 0 if not, non-zero otherwise
214 return(IS_BASECHAR(c
) || IS_IDEOGRAPHIC(c
));
217 /************************************************************************
219 * Input handling functions for progressive parsing *
221 ************************************************************************/
223 /* #define DEBUG_INPUT */
224 /* #define DEBUG_STACK */
225 /* #define DEBUG_PUSH */
228 /* we need to keep enough input to show errors in context */
232 #define CHECK_BUFFER(in) check_buffer(in)
235 void check_buffer(xmlParserInputPtr in
) {
236 if (in
->base
!= xmlBufContent(in
->buf
->buffer
)) {
237 xmlGenericError(xmlGenericErrorContext
,
238 "xmlParserInput: base mismatch problem\n");
240 if (in
->cur
< in
->base
) {
241 xmlGenericError(xmlGenericErrorContext
,
242 "xmlParserInput: cur < base problem\n");
244 if (in
->cur
> in
->base
+ xmlBufUse(in
->buf
->buffer
)) {
245 xmlGenericError(xmlGenericErrorContext
,
246 "xmlParserInput: cur > base + use problem\n");
248 xmlGenericError(xmlGenericErrorContext
,"buffer %x : content %x, cur %d, use %d\n",
249 (int) in
, (int) xmlBufContent(in
->buf
->buffer
), in
->cur
- in
->base
,
250 xmlBufUse(in
->buf
->buffer
));
254 #define CHECK_BUFFER(in)
259 * xmlParserInputRead:
260 * @in: an XML parser input
261 * @len: an indicative size for the lookahead
263 * This function was internal and is deprecated.
265 * Returns -1 as this is an error to use it.
268 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED
, int len ATTRIBUTE_UNUSED
) {
273 * xmlParserInputGrow:
274 * @in: an XML parser input
275 * @len: an indicative size for the lookahead
277 * This function increase the input for the parser. It tries to
278 * preserve pointers to the input buffer, and keep already read data
280 * Returns the amount of char read, or -1 in case of error, 0 indicate the
284 xmlParserInputGrow(xmlParserInputPtr in
, int len
) {
288 if ((in
== NULL
) || (len
< 0)) return(-1);
290 xmlGenericError(xmlGenericErrorContext
, "Grow\n");
292 if (in
->buf
== NULL
) return(-1);
293 if (in
->base
== NULL
) return(-1);
294 if (in
->cur
== NULL
) return(-1);
295 if (in
->buf
->buffer
== NULL
) return(-1);
299 indx
= in
->cur
- in
->base
;
300 if (xmlBufUse(in
->buf
->buffer
) > (unsigned int) indx
+ INPUT_CHUNK
) {
306 if (in
->buf
->readcallback
!= NULL
) {
307 ret
= xmlParserInputBufferGrow(in
->buf
, len
);
311 in
->base
= xmlBufContent(in
->buf
->buffer
);
312 in
->cur
= in
->base
+ indx
;
313 in
->end
= xmlBufEnd(in
->buf
->buffer
);
321 * xmlParserInputShrink:
322 * @in: an XML parser input
324 * This function removes used input for the parser.
327 xmlParserInputShrink(xmlParserInputPtr in
) {
332 xmlGenericError(xmlGenericErrorContext
, "Shrink\n");
334 if (in
== NULL
) return;
335 if (in
->buf
== NULL
) return;
336 if (in
->base
== NULL
) return;
337 if (in
->cur
== NULL
) return;
338 if (in
->buf
->buffer
== NULL
) return;
342 used
= in
->cur
- in
->base
;
344 * Do not shrink on large buffers whose only a tiny fraction
347 if (used
> INPUT_CHUNK
) {
348 ret
= xmlBufShrink(in
->buf
->buffer
, used
- LINE_LEN
);
355 if (xmlBufUse(in
->buf
->buffer
) <= INPUT_CHUNK
) {
356 xmlParserInputBufferRead(in
->buf
, 2 * INPUT_CHUNK
);
359 in
->base
= xmlBufContent(in
->buf
->buffer
);
360 in
->cur
= in
->base
+ used
;
361 in
->end
= xmlBufEnd(in
->buf
->buffer
);
366 /************************************************************************
368 * UTF8 character input and related functions *
370 ************************************************************************/
374 * @ctxt: the XML parser context
376 * Skip to the next char input char.
380 xmlNextChar(xmlParserCtxtPtr ctxt
)
382 if ((ctxt
== NULL
) || (ctxt
->instate
== XML_PARSER_EOF
) ||
383 (ctxt
->input
== NULL
))
386 if (!(VALID_CTXT(ctxt
))) {
387 xmlErrInternal(ctxt
, "Parser input data memory error\n", NULL
);
388 ctxt
->errNo
= XML_ERR_INTERNAL_ERROR
;
393 if ((*ctxt
->input
->cur
== 0) &&
394 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0)) {
398 if (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
) {
399 const unsigned char *cur
;
403 * 2.11 End-of-Line Handling
404 * the literal two-character sequence "#xD#xA" or a standalone
405 * literal #xD, an XML processor must pass to the application
406 * the single character #xA.
408 if (*(ctxt
->input
->cur
) == '\n') {
409 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
414 * We are supposed to handle UTF8, check it's valid
415 * From rfc2044: encoding of the Unicode values on UTF-8:
417 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
418 * 0000 0000-0000 007F 0xxxxxxx
419 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
420 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
422 * Check for the 0x110000 limit too
424 cur
= ctxt
->input
->cur
;
431 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
432 cur
= ctxt
->input
->cur
;
434 if ((cur
[1] & 0xc0) != 0x80)
436 if ((c
& 0xe0) == 0xe0) {
440 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
441 cur
= ctxt
->input
->cur
;
443 if ((cur
[2] & 0xc0) != 0x80)
445 if ((c
& 0xf0) == 0xf0) {
447 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
448 cur
= ctxt
->input
->cur
;
450 if (((c
& 0xf8) != 0xf0) ||
451 ((cur
[3] & 0xc0) != 0x80))
454 ctxt
->input
->cur
+= 4;
455 val
= (cur
[0] & 0x7) << 18;
456 val
|= (cur
[1] & 0x3f) << 12;
457 val
|= (cur
[2] & 0x3f) << 6;
458 val
|= cur
[3] & 0x3f;
461 ctxt
->input
->cur
+= 3;
462 val
= (cur
[0] & 0xf) << 12;
463 val
|= (cur
[1] & 0x3f) << 6;
464 val
|= cur
[2] & 0x3f;
466 if (((val
> 0xd7ff) && (val
< 0xe000)) ||
467 ((val
> 0xfffd) && (val
< 0x10000)) ||
469 xmlErrEncodingInt(ctxt
, XML_ERR_INVALID_CHAR
,
470 "Char 0x%X out of allowed range\n",
475 ctxt
->input
->cur
+= 2;
481 * Assume it's a fixed length encoding (1) with
482 * a compatible encoding for the ASCII set, since
483 * XML constructs only use < 128 chars
486 if (*(ctxt
->input
->cur
) == '\n') {
487 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
492 if (*ctxt
->input
->cur
== 0)
493 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
497 * If we detect an UTF8 error that probably mean that the
498 * input encoding didn't get properly advertised in the
499 * declaration header. Report the error and switch the encoding
500 * to ISO-Latin-1 (if you don't like this policy, just declare the
503 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
) ||
504 (ctxt
->input
->end
- ctxt
->input
->cur
< 4)) {
505 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
506 "Input is not proper UTF-8, indicate encoding !\n",
511 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
512 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
513 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
514 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
515 "Input is not proper UTF-8, indicate encoding !\n%s",
516 BAD_CAST buffer
, NULL
);
518 ctxt
->charset
= XML_CHAR_ENCODING_8859_1
;
525 * @ctxt: the XML parser context
526 * @len: pointer to the length of the char read
528 * The current char value, if using UTF-8 this may actually span multiple
529 * bytes in the input buffer. Implement the end of line normalization:
530 * 2.11 End-of-Line Handling
531 * Wherever an external parsed entity or the literal entity value
532 * of an internal parsed entity contains either the literal two-character
533 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
534 * must pass to the application the single character #xA.
535 * This behavior can conveniently be produced by normalizing all
536 * line breaks to #xA on input, before parsing.)
538 * Returns the current char value and its length
542 xmlCurrentChar(xmlParserCtxtPtr ctxt
, int *len
) {
543 if ((ctxt
== NULL
) || (len
== NULL
) || (ctxt
->input
== NULL
)) return(0);
544 if (ctxt
->instate
== XML_PARSER_EOF
)
547 if ((*ctxt
->input
->cur
>= 0x20) && (*ctxt
->input
->cur
<= 0x7F)) {
549 return((int) *ctxt
->input
->cur
);
551 if (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
) {
553 * We are supposed to handle UTF8, check it's valid
554 * From rfc2044: encoding of the Unicode values on UTF-8:
556 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
557 * 0000 0000-0000 007F 0xxxxxxx
558 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
559 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
561 * Check for the 0x110000 limit too
563 const unsigned char *cur
= ctxt
->input
->cur
;
569 if (((c
& 0x40) == 0) || (c
== 0xC0))
572 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
573 cur
= ctxt
->input
->cur
;
575 if ((cur
[1] & 0xc0) != 0x80)
577 if ((c
& 0xe0) == 0xe0) {
579 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
580 cur
= ctxt
->input
->cur
;
582 if ((cur
[2] & 0xc0) != 0x80)
584 if ((c
& 0xf0) == 0xf0) {
586 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
587 cur
= ctxt
->input
->cur
;
589 if (((c
& 0xf8) != 0xf0) ||
590 ((cur
[3] & 0xc0) != 0x80))
594 val
= (cur
[0] & 0x7) << 18;
595 val
|= (cur
[1] & 0x3f) << 12;
596 val
|= (cur
[2] & 0x3f) << 6;
597 val
|= cur
[3] & 0x3f;
603 val
= (cur
[0] & 0xf) << 12;
604 val
|= (cur
[1] & 0x3f) << 6;
605 val
|= cur
[2] & 0x3f;
612 val
= (cur
[0] & 0x1f) << 6;
613 val
|= cur
[1] & 0x3f;
618 xmlErrEncodingInt(ctxt
, XML_ERR_INVALID_CHAR
,
619 "Char 0x%X out of allowed range\n", val
);
625 if (*ctxt
->input
->cur
== 0)
626 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
627 if ((*ctxt
->input
->cur
== 0) &&
628 (ctxt
->input
->end
> ctxt
->input
->cur
)) {
629 xmlErrEncodingInt(ctxt
, XML_ERR_INVALID_CHAR
,
630 "Char 0x0 out of allowed range\n", 0);
632 if (*ctxt
->input
->cur
== 0xD) {
633 if (ctxt
->input
->cur
[1] == 0xA) {
638 return((int) *ctxt
->input
->cur
);
642 * Assume it's a fixed length encoding (1) with
643 * a compatible encoding for the ASCII set, since
644 * XML constructs only use < 128 chars
647 if (*ctxt
->input
->cur
== 0xD) {
648 if (ctxt
->input
->cur
[1] == 0xA) {
653 return((int) *ctxt
->input
->cur
);
656 * An encoding problem may arise from a truncated input buffer
657 * splitting a character in the middle. In that case do not raise
658 * an error but return 0 to indicate an end of stream problem
660 if (ctxt
->input
->end
- ctxt
->input
->cur
< 4) {
666 * If we detect an UTF8 error that probably mean that the
667 * input encoding didn't get properly advertised in the
668 * declaration header. Report the error and switch the encoding
669 * to ISO-Latin-1 (if you don't like this policy, just declare the
675 snprintf(&buffer
[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
676 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
677 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
678 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
679 "Input is not proper UTF-8, indicate encoding !\n%s",
680 BAD_CAST buffer
, NULL
);
682 ctxt
->charset
= XML_CHAR_ENCODING_8859_1
;
684 return((int) *ctxt
->input
->cur
);
688 * xmlStringCurrentChar:
689 * @ctxt: the XML parser context
690 * @cur: pointer to the beginning of the char
691 * @len: pointer to the length of the char read
693 * The current char value, if using UTF-8 this may actually span multiple
694 * bytes in the input buffer.
696 * Returns the current char value and its length
700 xmlStringCurrentChar(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
, int *len
)
702 if ((len
== NULL
) || (cur
== NULL
)) return(0);
703 if ((ctxt
== NULL
) || (ctxt
->charset
== XML_CHAR_ENCODING_UTF8
)) {
705 * We are supposed to handle UTF8, check it's valid
706 * From rfc2044: encoding of the Unicode values on UTF-8:
708 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
709 * 0000 0000-0000 007F 0xxxxxxx
710 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
711 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
713 * Check for the 0x110000 limit too
720 if ((cur
[1] & 0xc0) != 0x80)
722 if ((c
& 0xe0) == 0xe0) {
724 if ((cur
[2] & 0xc0) != 0x80)
726 if ((c
& 0xf0) == 0xf0) {
727 if (((c
& 0xf8) != 0xf0) || ((cur
[3] & 0xc0) != 0x80))
731 val
= (cur
[0] & 0x7) << 18;
732 val
|= (cur
[1] & 0x3f) << 12;
733 val
|= (cur
[2] & 0x3f) << 6;
734 val
|= cur
[3] & 0x3f;
738 val
= (cur
[0] & 0xf) << 12;
739 val
|= (cur
[1] & 0x3f) << 6;
740 val
|= cur
[2] & 0x3f;
745 val
= (cur
[0] & 0x1f) << 6;
746 val
|= cur
[1] & 0x3f;
749 xmlErrEncodingInt(ctxt
, XML_ERR_INVALID_CHAR
,
750 "Char 0x%X out of allowed range\n", val
);
760 * Assume it's a fixed length encoding (1) with
761 * a compatible encoding for the ASCII set, since
762 * XML constructs only use < 128 chars
769 * An encoding problem may arise from a truncated input buffer
770 * splitting a character in the middle. In that case do not raise
771 * an error but return 0 to indicate an end of stream problem
773 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
) ||
774 (ctxt
->input
->end
- ctxt
->input
->cur
< 4)) {
779 * If we detect an UTF8 error that probably mean that the
780 * input encoding didn't get properly advertised in the
781 * declaration header. Report the error and switch the encoding
782 * to ISO-Latin-1 (if you don't like this policy, just declare the
788 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
789 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
790 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
791 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
792 "Input is not proper UTF-8, indicate encoding !\n%s",
793 BAD_CAST buffer
, NULL
);
800 * xmlCopyCharMultiByte:
801 * @out: pointer to an array of xmlChar
802 * @val: the char value
804 * append the char value in the array
806 * Returns the number of xmlChar written
809 xmlCopyCharMultiByte(xmlChar
*out
, int val
) {
810 if (out
== NULL
) return(0);
812 * We are supposed to handle UTF8, check it's valid
813 * From rfc2044: encoding of the Unicode values on UTF-8:
815 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
816 * 0000 0000-0000 007F 0xxxxxxx
817 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
818 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
821 xmlChar
*savedout
= out
;
823 if (val
< 0x800) { *out
++= (val
>> 6) | 0xC0; bits
= 0; }
824 else if (val
< 0x10000) { *out
++= (val
>> 12) | 0xE0; bits
= 6;}
825 else if (val
< 0x110000) { *out
++= (val
>> 18) | 0xF0; bits
= 12; }
827 xmlErrEncodingInt(NULL
, XML_ERR_INVALID_CHAR
,
828 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
832 for ( ; bits
>= 0; bits
-= 6)
833 *out
++= ((val
>> bits
) & 0x3F) | 0x80 ;
834 return (out
- savedout
);
836 *out
= (xmlChar
) val
;
842 * @len: Ignored, compatibility
843 * @out: pointer to an array of xmlChar
844 * @val: the char value
846 * append the char value in the array
848 * Returns the number of xmlChar written
852 xmlCopyChar(int len ATTRIBUTE_UNUSED
, xmlChar
*out
, int val
) {
853 if (out
== NULL
) return(0);
854 /* the len parameter is ignored */
856 return(xmlCopyCharMultiByte (out
, val
));
858 *out
= (xmlChar
) val
;
862 /************************************************************************
864 * Commodity functions to switch encodings *
866 ************************************************************************/
869 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
,
870 xmlCharEncodingHandlerPtr handler
, int len
);
873 * @ctxt: the parser context
874 * @enc: the encoding value (number)
876 * change the input functions when discovering the character encoding
879 * Returns 0 in case of success, -1 otherwise
882 xmlSwitchEncoding(xmlParserCtxtPtr ctxt
, xmlCharEncoding enc
)
884 xmlCharEncodingHandlerPtr handler
;
888 if (ctxt
== NULL
) return(-1);
890 case XML_CHAR_ENCODING_ERROR
:
891 __xmlErrEncoding(ctxt
, XML_ERR_UNKNOWN_ENCODING
,
892 "encoding unknown\n", NULL
, NULL
);
894 case XML_CHAR_ENCODING_NONE
:
895 /* let's assume it's UTF-8 without the XML decl */
896 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
898 case XML_CHAR_ENCODING_UTF8
:
899 /* default encoding, no conversion should be needed */
900 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
903 * Errata on XML-1.0 June 20 2001
904 * Specific handling of the Byte Order Mark for
907 if ((ctxt
->input
!= NULL
) &&
908 (ctxt
->input
->cur
[0] == 0xEF) &&
909 (ctxt
->input
->cur
[1] == 0xBB) &&
910 (ctxt
->input
->cur
[2] == 0xBF)) {
911 ctxt
->input
->cur
+= 3;
914 case XML_CHAR_ENCODING_UTF16LE
:
915 case XML_CHAR_ENCODING_UTF16BE
:
916 /*The raw input characters are encoded
917 *in UTF-16. As we expect this function
918 *to be called after xmlCharEncInFunc, we expect
919 *ctxt->input->cur to contain UTF-8 encoded characters.
920 *So the raw UTF16 Byte Order Mark
921 *has also been converted into
922 *an UTF-8 BOM. Let's skip that BOM.
924 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->cur
!= NULL
) &&
925 (ctxt
->input
->cur
[0] == 0xEF) &&
926 (ctxt
->input
->cur
[1] == 0xBB) &&
927 (ctxt
->input
->cur
[2] == 0xBF)) {
928 ctxt
->input
->cur
+= 3;
932 case XML_CHAR_ENCODING_UCS2
:
935 case XML_CHAR_ENCODING_UCS4BE
:
936 case XML_CHAR_ENCODING_UCS4LE
:
937 case XML_CHAR_ENCODING_UCS4_2143
:
938 case XML_CHAR_ENCODING_UCS4_3412
:
941 case XML_CHAR_ENCODING_EBCDIC
:
942 case XML_CHAR_ENCODING_8859_1
:
943 case XML_CHAR_ENCODING_8859_2
:
944 case XML_CHAR_ENCODING_8859_3
:
945 case XML_CHAR_ENCODING_8859_4
:
946 case XML_CHAR_ENCODING_8859_5
:
947 case XML_CHAR_ENCODING_8859_6
:
948 case XML_CHAR_ENCODING_8859_7
:
949 case XML_CHAR_ENCODING_8859_8
:
950 case XML_CHAR_ENCODING_8859_9
:
951 case XML_CHAR_ENCODING_ASCII
:
952 case XML_CHAR_ENCODING_2022_JP
:
953 case XML_CHAR_ENCODING_SHIFT_JIS
:
954 case XML_CHAR_ENCODING_EUC_JP
:
958 handler
= xmlGetCharEncodingHandler(enc
);
959 if (handler
== NULL
) {
964 case XML_CHAR_ENCODING_ASCII
:
965 /* default encoding, no conversion should be needed */
966 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
968 case XML_CHAR_ENCODING_8859_1
:
969 if ((ctxt
->inputNr
== 1) &&
970 (ctxt
->encoding
== NULL
) &&
971 (ctxt
->input
!= NULL
) &&
972 (ctxt
->input
->encoding
!= NULL
)) {
973 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
978 __xmlErrEncoding(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
979 "encoding not supported: %s\n",
980 BAD_CAST
xmlGetCharEncodingName(enc
), NULL
);
982 * TODO: We could recover from errors in external entities
983 * if we didn't stop the parser. But most callers of this
984 * function don't check the return value.
990 ret
= xmlSwitchInputEncodingInt(ctxt
, ctxt
->input
, handler
, len
);
991 if ((ret
< 0) || (ctxt
->errNo
== XML_I18N_CONV_FAILED
)) {
993 * on encoding conversion errors, stop the parser
996 ctxt
->errNo
= XML_I18N_CONV_FAILED
;
1002 * xmlSwitchInputEncodingInt:
1003 * @ctxt: the parser context
1004 * @input: the input stream
1005 * @handler: the encoding handler
1006 * @len: the number of bytes to convert for the first line or -1
1008 * change the input functions when discovering the character encoding
1009 * of a given entity.
1011 * Returns 0 in case of success, -1 otherwise
1014 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
,
1015 xmlCharEncodingHandlerPtr handler
, int len
)
1019 if (handler
== NULL
)
1023 if (input
->buf
!= NULL
) {
1024 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1026 if (input
->buf
->encoder
!= NULL
) {
1028 * Check in case the auto encoding detection triggered
1031 if (input
->buf
->encoder
== handler
)
1035 * "UTF-16" can be used for both LE and BE
1036 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1037 BAD_CAST "UTF-16", 6)) &&
1038 (!xmlStrncmp(BAD_CAST handler->name,
1039 BAD_CAST "UTF-16", 6))) {
1045 * Note: this is a bit dangerous, but that's what it
1046 * takes to use nearly compatible signature for different
1049 * FIXME: Encoders might buffer partial byte sequences, so
1050 * this probably can't work. We should return an error and
1051 * make sure that callers never try to switch the encoding
1054 xmlCharEncCloseFunc(input
->buf
->encoder
);
1055 input
->buf
->encoder
= handler
;
1058 input
->buf
->encoder
= handler
;
1061 * Is there already some content down the pipe to convert ?
1063 if (xmlBufIsEmpty(input
->buf
->buffer
) == 0) {
1068 * Specific handling of the Byte Order Mark for
1071 if ((handler
->name
!= NULL
) &&
1072 (!strcmp(handler
->name
, "UTF-16LE") ||
1073 !strcmp(handler
->name
, "UTF-16")) &&
1074 (input
->cur
[0] == 0xFF) && (input
->cur
[1] == 0xFE)) {
1077 if ((handler
->name
!= NULL
) &&
1078 (!strcmp(handler
->name
, "UTF-16BE")) &&
1079 (input
->cur
[0] == 0xFE) && (input
->cur
[1] == 0xFF)) {
1083 * Errata on XML-1.0 June 20 2001
1084 * Specific handling of the Byte Order Mark for
1087 if ((handler
->name
!= NULL
) &&
1088 (!strcmp(handler
->name
, "UTF-8")) &&
1089 (input
->cur
[0] == 0xEF) &&
1090 (input
->cur
[1] == 0xBB) && (input
->cur
[2] == 0xBF)) {
1095 * Shrink the current input buffer.
1096 * Move it as the raw buffer and create a new input buffer
1098 processed
= input
->cur
- input
->base
;
1099 xmlBufShrink(input
->buf
->buffer
, processed
);
1100 input
->buf
->raw
= input
->buf
->buffer
;
1101 input
->buf
->buffer
= xmlBufCreate();
1102 input
->buf
->rawconsumed
= processed
;
1103 use
= xmlBufUse(input
->buf
->raw
);
1107 * convert as much as possible of the buffer
1109 nbchars
= xmlCharEncInput(input
->buf
, 1);
1112 * convert just enough to get
1113 * '<?xml version="1.0" encoding="xxx"?>'
1114 * parsed with the autodetected encoding
1115 * into the parser reading buffer.
1117 nbchars
= xmlCharEncFirstLineInput(input
->buf
, len
);
1119 xmlBufResetInput(input
->buf
->buffer
, input
);
1121 xmlErrInternal(ctxt
,
1122 "switching encoding: encoder error\n",
1126 input
->buf
->rawconsumed
+= use
- xmlBufUse(input
->buf
->raw
);
1130 xmlErrInternal(ctxt
,
1131 "static memory buffer doesn't support encoding\n", NULL
);
1133 * Callers assume that the input buffer takes ownership of the
1134 * encoding handler. xmlCharEncCloseFunc frees unregistered
1135 * handlers and avoids a memory leak.
1137 xmlCharEncCloseFunc(handler
);
1143 * xmlSwitchInputEncoding:
1144 * @ctxt: the parser context
1145 * @input: the input stream
1146 * @handler: the encoding handler
1148 * DEPRECATED: Use xmlSwitchToEncoding
1150 * change the input functions when discovering the character encoding
1151 * of a given entity.
1153 * Returns 0 in case of success, -1 otherwise
1156 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
,
1157 xmlCharEncodingHandlerPtr handler
) {
1158 return(xmlSwitchInputEncodingInt(ctxt
, input
, handler
, -1));
1162 * xmlSwitchToEncoding:
1163 * @ctxt: the parser context
1164 * @handler: the encoding handler
1166 * change the input functions when discovering the character encoding
1167 * of a given entity.
1169 * Returns 0 in case of success, -1 otherwise
1172 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt
, xmlCharEncodingHandlerPtr handler
)
1176 return(xmlSwitchInputEncodingInt(ctxt
, ctxt
->input
, handler
, -1));
1179 /************************************************************************
1181 * Commodity functions to handle entities processing *
1183 ************************************************************************/
1186 * xmlFreeInputStream:
1187 * @input: an xmlParserInputPtr
1189 * Free up an input stream.
1192 xmlFreeInputStream(xmlParserInputPtr input
) {
1193 if (input
== NULL
) return;
1195 if (input
->filename
!= NULL
) xmlFree((char *) input
->filename
);
1196 if (input
->directory
!= NULL
) xmlFree((char *) input
->directory
);
1197 if (input
->encoding
!= NULL
) xmlFree((char *) input
->encoding
);
1198 if (input
->version
!= NULL
) xmlFree((char *) input
->version
);
1199 if ((input
->free
!= NULL
) && (input
->base
!= NULL
))
1200 input
->free((xmlChar
*) input
->base
);
1201 if (input
->buf
!= NULL
)
1202 xmlFreeParserInputBuffer(input
->buf
);
1207 * xmlNewInputStream:
1208 * @ctxt: an XML parser context
1210 * Create a new input stream structure.
1212 * Returns the new input stream or NULL
1215 xmlNewInputStream(xmlParserCtxtPtr ctxt
) {
1216 xmlParserInputPtr input
;
1218 input
= (xmlParserInputPtr
) xmlMalloc(sizeof(xmlParserInput
));
1219 if (input
== NULL
) {
1220 xmlErrMemory(ctxt
, "couldn't allocate a new input stream\n");
1223 memset(input
, 0, sizeof(xmlParserInput
));
1226 input
->standalone
= -1;
1229 * If the context is NULL the id cannot be initialized, but that
1230 * should not happen while parsing which is the situation where
1231 * the id is actually needed.
1234 input
->id
= ctxt
->input_id
++;
1240 * xmlNewIOInputStream:
1241 * @ctxt: an XML parser context
1242 * @input: an I/O Input
1243 * @enc: the charset encoding if known
1245 * Create a new input stream structure encapsulating the @input into
1246 * a stream suitable for the parser.
1248 * Returns the new input stream or NULL
1251 xmlNewIOInputStream(xmlParserCtxtPtr ctxt
, xmlParserInputBufferPtr input
,
1252 xmlCharEncoding enc
) {
1253 xmlParserInputPtr inputStream
;
1255 if (input
== NULL
) return(NULL
);
1256 if (xmlParserDebugEntities
)
1257 xmlGenericError(xmlGenericErrorContext
, "new input from I/O\n");
1258 inputStream
= xmlNewInputStream(ctxt
);
1259 if (inputStream
== NULL
) {
1262 inputStream
->filename
= NULL
;
1263 inputStream
->buf
= input
;
1264 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
1266 if (enc
!= XML_CHAR_ENCODING_NONE
) {
1267 xmlSwitchEncoding(ctxt
, enc
);
1270 return(inputStream
);
1274 * xmlNewEntityInputStream:
1275 * @ctxt: an XML parser context
1276 * @entity: an Entity pointer
1278 * Create a new input stream based on an xmlEntityPtr
1280 * Returns the new input stream or NULL
1283 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
1284 xmlParserInputPtr input
;
1286 if (entity
== NULL
) {
1287 xmlErrInternal(ctxt
, "xmlNewEntityInputStream entity = NULL\n",
1291 if (xmlParserDebugEntities
)
1292 xmlGenericError(xmlGenericErrorContext
,
1293 "new input from entity: %s\n", entity
->name
);
1294 if (entity
->content
== NULL
) {
1295 switch (entity
->etype
) {
1296 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
:
1297 xmlErrInternal(ctxt
, "Cannot parse entity %s\n",
1300 case XML_EXTERNAL_GENERAL_PARSED_ENTITY
:
1301 case XML_EXTERNAL_PARAMETER_ENTITY
:
1302 return(xmlLoadExternalEntity((char *) entity
->URI
,
1303 (char *) entity
->ExternalID
, ctxt
));
1304 case XML_INTERNAL_GENERAL_ENTITY
:
1305 xmlErrInternal(ctxt
,
1306 "Internal entity %s without content !\n",
1309 case XML_INTERNAL_PARAMETER_ENTITY
:
1310 xmlErrInternal(ctxt
,
1311 "Internal parameter entity %s without content !\n",
1314 case XML_INTERNAL_PREDEFINED_ENTITY
:
1315 xmlErrInternal(ctxt
,
1316 "Predefined entity %s without content !\n",
1322 input
= xmlNewInputStream(ctxt
);
1323 if (input
== NULL
) {
1326 if (entity
->URI
!= NULL
)
1327 input
->filename
= (char *) xmlStrdup((xmlChar
*) entity
->URI
);
1328 input
->base
= entity
->content
;
1329 if (entity
->length
== 0)
1330 entity
->length
= xmlStrlen(entity
->content
);
1331 input
->cur
= entity
->content
;
1332 input
->length
= entity
->length
;
1333 input
->end
= &entity
->content
[input
->length
];
1338 * xmlNewStringInputStream:
1339 * @ctxt: an XML parser context
1340 * @buffer: an memory buffer
1342 * Create a new input stream based on a memory buffer.
1343 * Returns the new input stream
1346 xmlNewStringInputStream(xmlParserCtxtPtr ctxt
, const xmlChar
*buffer
) {
1347 xmlParserInputPtr input
;
1349 if (buffer
== NULL
) {
1350 xmlErrInternal(ctxt
, "xmlNewStringInputStream string = NULL\n",
1354 if (xmlParserDebugEntities
)
1355 xmlGenericError(xmlGenericErrorContext
,
1356 "new fixed input: %.30s\n", buffer
);
1357 input
= xmlNewInputStream(ctxt
);
1358 if (input
== NULL
) {
1359 xmlErrMemory(ctxt
, "couldn't allocate a new input stream\n");
1362 input
->base
= buffer
;
1363 input
->cur
= buffer
;
1364 input
->length
= xmlStrlen(buffer
);
1365 input
->end
= &buffer
[input
->length
];
1370 * xmlNewInputFromFile:
1371 * @ctxt: an XML parser context
1372 * @filename: the filename to use as entity
1374 * Create a new input stream based on a file or an URL.
1376 * Returns the new input stream or NULL in case of error
1379 xmlNewInputFromFile(xmlParserCtxtPtr ctxt
, const char *filename
) {
1380 xmlParserInputBufferPtr buf
;
1381 xmlParserInputPtr inputStream
;
1382 char *directory
= NULL
;
1383 xmlChar
*URI
= NULL
;
1385 if (xmlParserDebugEntities
)
1386 xmlGenericError(xmlGenericErrorContext
,
1387 "new input from file: %s\n", filename
);
1388 if (ctxt
== NULL
) return(NULL
);
1389 buf
= xmlParserInputBufferCreateFilename(filename
, XML_CHAR_ENCODING_NONE
);
1391 if (filename
== NULL
)
1392 __xmlLoaderErr(ctxt
,
1393 "failed to load external entity: NULL filename \n",
1396 __xmlLoaderErr(ctxt
, "failed to load external entity \"%s\"\n",
1397 (const char *) filename
);
1401 inputStream
= xmlNewInputStream(ctxt
);
1402 if (inputStream
== NULL
) {
1403 xmlFreeParserInputBuffer(buf
);
1407 inputStream
->buf
= buf
;
1408 inputStream
= xmlCheckHTTPInput(ctxt
, inputStream
);
1409 if (inputStream
== NULL
)
1412 if (inputStream
->filename
== NULL
)
1413 URI
= xmlStrdup((xmlChar
*) filename
);
1415 URI
= xmlStrdup((xmlChar
*) inputStream
->filename
);
1416 directory
= xmlParserGetDirectory((const char *) URI
);
1417 if (inputStream
->filename
!= NULL
) xmlFree((char *)inputStream
->filename
);
1418 inputStream
->filename
= (char *) xmlCanonicPath((const xmlChar
*) URI
);
1419 if (URI
!= NULL
) xmlFree((char *) URI
);
1420 inputStream
->directory
= directory
;
1422 xmlBufResetInput(inputStream
->buf
->buffer
, inputStream
);
1423 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
1424 ctxt
->directory
= (char *) xmlStrdup((const xmlChar
*) directory
);
1425 return(inputStream
);
1428 /************************************************************************
1430 * Commodity functions to handle parser contexts *
1432 ************************************************************************/
1435 * xmlInitParserCtxt:
1436 * @ctxt: an XML parser context
1438 * Initialize a parser context
1440 * Returns 0 in case of success and -1 in case of error
1444 xmlInitParserCtxt(xmlParserCtxtPtr ctxt
)
1446 xmlParserInputPtr input
;
1449 xmlErrInternal(NULL
, "Got NULL parser context\n", NULL
);
1455 if (ctxt
->dict
== NULL
)
1456 ctxt
->dict
= xmlDictCreate();
1457 if (ctxt
->dict
== NULL
) {
1458 xmlErrMemory(NULL
, "cannot initialize parser context\n");
1461 xmlDictSetLimit(ctxt
->dict
, XML_MAX_DICTIONARY_LIMIT
);
1463 if (ctxt
->sax
== NULL
)
1464 ctxt
->sax
= (xmlSAXHandler
*) xmlMalloc(sizeof(xmlSAXHandler
));
1465 if (ctxt
->sax
== NULL
) {
1466 xmlErrMemory(NULL
, "cannot initialize parser context\n");
1470 xmlSAXVersion(ctxt
->sax
, 2);
1474 /* Allocate the Input stack */
1475 if (ctxt
->inputTab
== NULL
) {
1476 ctxt
->inputTab
= (xmlParserInputPtr
*)
1477 xmlMalloc(5 * sizeof(xmlParserInputPtr
));
1480 if (ctxt
->inputTab
== NULL
) {
1481 xmlErrMemory(NULL
, "cannot initialize parser context\n");
1487 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
1488 xmlFreeInputStream(input
);
1493 ctxt
->version
= NULL
;
1494 ctxt
->encoding
= NULL
;
1495 ctxt
->standalone
= -1;
1496 ctxt
->hasExternalSubset
= 0;
1497 ctxt
->hasPErefs
= 0;
1500 ctxt
->instate
= XML_PARSER_START
;
1502 ctxt
->directory
= NULL
;
1504 /* Allocate the Node stack */
1505 if (ctxt
->nodeTab
== NULL
) {
1506 ctxt
->nodeTab
= (xmlNodePtr
*) xmlMalloc(10 * sizeof(xmlNodePtr
));
1509 if (ctxt
->nodeTab
== NULL
) {
1510 xmlErrMemory(NULL
, "cannot initialize parser context\n");
1522 /* Allocate the Name stack */
1523 if (ctxt
->nameTab
== NULL
) {
1524 ctxt
->nameTab
= (const xmlChar
**) xmlMalloc(10 * sizeof(xmlChar
*));
1527 if (ctxt
->nameTab
== NULL
) {
1528 xmlErrMemory(NULL
, "cannot initialize parser context\n");
1543 /* Allocate the space stack */
1544 if (ctxt
->spaceTab
== NULL
) {
1545 ctxt
->spaceTab
= (int *) xmlMalloc(10 * sizeof(int));
1546 ctxt
->spaceMax
= 10;
1548 if (ctxt
->spaceTab
== NULL
) {
1549 xmlErrMemory(NULL
, "cannot initialize parser context\n");
1565 ctxt
->spaceMax
= 10;
1566 ctxt
->spaceTab
[0] = -1;
1567 ctxt
->space
= &ctxt
->spaceTab
[0];
1568 ctxt
->userData
= ctxt
;
1570 ctxt
->wellFormed
= 1;
1571 ctxt
->nsWellFormed
= 1;
1573 ctxt
->loadsubset
= xmlLoadExtDtdDefaultValue
;
1574 if (ctxt
->loadsubset
) {
1575 ctxt
->options
|= XML_PARSE_DTDLOAD
;
1577 ctxt
->validate
= xmlDoValidityCheckingDefaultValue
;
1578 ctxt
->pedantic
= xmlPedanticParserDefaultValue
;
1579 if (ctxt
->pedantic
) {
1580 ctxt
->options
|= XML_PARSE_PEDANTIC
;
1582 ctxt
->linenumbers
= xmlLineNumbersDefaultValue
;
1583 ctxt
->keepBlanks
= xmlKeepBlanksDefaultValue
;
1584 if (ctxt
->keepBlanks
== 0) {
1585 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
1586 ctxt
->options
|= XML_PARSE_NOBLANKS
;
1589 ctxt
->vctxt
.flags
= XML_VCTXT_USE_PCTXT
;
1590 ctxt
->vctxt
.userData
= ctxt
;
1591 ctxt
->vctxt
.error
= xmlParserValidityError
;
1592 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
1593 if (ctxt
->validate
) {
1594 if (xmlGetWarningsDefaultValue
== 0)
1595 ctxt
->vctxt
.warning
= NULL
;
1597 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
1598 ctxt
->vctxt
.nodeMax
= 0;
1599 ctxt
->options
|= XML_PARSE_DTDVALID
;
1601 ctxt
->replaceEntities
= xmlSubstituteEntitiesDefaultValue
;
1602 if (ctxt
->replaceEntities
) {
1603 ctxt
->options
|= XML_PARSE_NOENT
;
1605 ctxt
->record_info
= 0;
1606 ctxt
->checkIndex
= 0;
1608 ctxt
->errNo
= XML_ERR_OK
;
1610 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
1611 ctxt
->catalogs
= NULL
;
1612 ctxt
->nbentities
= 0;
1613 ctxt
->sizeentities
= 0;
1614 ctxt
->sizeentcopy
= 0;
1616 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
1621 * xmlFreeParserCtxt:
1622 * @ctxt: an XML parser context
1624 * Free all the memory used by a parser context. However the parsed
1625 * document in ctxt->myDoc is not freed.
1629 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt
)
1631 xmlParserInputPtr input
;
1633 if (ctxt
== NULL
) return;
1635 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
1636 xmlFreeInputStream(input
);
1638 if (ctxt
->spaceTab
!= NULL
) xmlFree(ctxt
->spaceTab
);
1639 if (ctxt
->nameTab
!= NULL
) xmlFree((xmlChar
* *)ctxt
->nameTab
);
1640 if (ctxt
->nodeTab
!= NULL
) xmlFree(ctxt
->nodeTab
);
1641 if (ctxt
->nodeInfoTab
!= NULL
) xmlFree(ctxt
->nodeInfoTab
);
1642 if (ctxt
->inputTab
!= NULL
) xmlFree(ctxt
->inputTab
);
1643 if (ctxt
->version
!= NULL
) xmlFree((char *) ctxt
->version
);
1644 if (ctxt
->encoding
!= NULL
) xmlFree((char *) ctxt
->encoding
);
1645 if (ctxt
->extSubURI
!= NULL
) xmlFree((char *) ctxt
->extSubURI
);
1646 if (ctxt
->extSubSystem
!= NULL
) xmlFree((char *) ctxt
->extSubSystem
);
1647 #ifdef LIBXML_SAX1_ENABLED
1648 if ((ctxt
->sax
!= NULL
) &&
1649 (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
))
1651 if (ctxt
->sax
!= NULL
)
1652 #endif /* LIBXML_SAX1_ENABLED */
1654 if (ctxt
->directory
!= NULL
) xmlFree((char *) ctxt
->directory
);
1655 if (ctxt
->vctxt
.nodeTab
!= NULL
) xmlFree(ctxt
->vctxt
.nodeTab
);
1656 if (ctxt
->atts
!= NULL
) xmlFree((xmlChar
* *)ctxt
->atts
);
1657 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
1658 if (ctxt
->nsTab
!= NULL
) xmlFree((char *) ctxt
->nsTab
);
1659 if (ctxt
->pushTab
!= NULL
) xmlFree(ctxt
->pushTab
);
1660 if (ctxt
->attallocs
!= NULL
) xmlFree(ctxt
->attallocs
);
1661 if (ctxt
->attsDefault
!= NULL
)
1662 xmlHashFree(ctxt
->attsDefault
, xmlHashDefaultDeallocator
);
1663 if (ctxt
->attsSpecial
!= NULL
)
1664 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1665 if (ctxt
->freeElems
!= NULL
) {
1666 xmlNodePtr cur
, next
;
1668 cur
= ctxt
->freeElems
;
1669 while (cur
!= NULL
) {
1675 if (ctxt
->freeAttrs
!= NULL
) {
1676 xmlAttrPtr cur
, next
;
1678 cur
= ctxt
->freeAttrs
;
1679 while (cur
!= NULL
) {
1686 * cleanup the error strings
1688 if (ctxt
->lastError
.message
!= NULL
)
1689 xmlFree(ctxt
->lastError
.message
);
1690 if (ctxt
->lastError
.file
!= NULL
)
1691 xmlFree(ctxt
->lastError
.file
);
1692 if (ctxt
->lastError
.str1
!= NULL
)
1693 xmlFree(ctxt
->lastError
.str1
);
1694 if (ctxt
->lastError
.str2
!= NULL
)
1695 xmlFree(ctxt
->lastError
.str2
);
1696 if (ctxt
->lastError
.str3
!= NULL
)
1697 xmlFree(ctxt
->lastError
.str3
);
1699 #ifdef LIBXML_CATALOG_ENABLED
1700 if (ctxt
->catalogs
!= NULL
)
1701 xmlCatalogFreeLocal(ctxt
->catalogs
);
1709 * Allocate and initialize a new parser context.
1711 * Returns the xmlParserCtxtPtr or NULL
1715 xmlNewParserCtxt(void)
1717 xmlParserCtxtPtr ctxt
;
1719 ctxt
= (xmlParserCtxtPtr
) xmlMalloc(sizeof(xmlParserCtxt
));
1721 xmlErrMemory(NULL
, "cannot allocate parser context\n");
1724 memset(ctxt
, 0, sizeof(xmlParserCtxt
));
1725 if (xmlInitParserCtxt(ctxt
) < 0) {
1726 xmlFreeParserCtxt(ctxt
);
1732 /************************************************************************
1734 * Handling of node information *
1736 ************************************************************************/
1739 * xmlClearParserCtxt:
1740 * @ctxt: an XML parser context
1742 * Clear (release owned resources) and reinitialize a parser context
1746 xmlClearParserCtxt(xmlParserCtxtPtr ctxt
)
1750 xmlClearNodeInfoSeq(&ctxt
->node_seq
);
1756 * xmlParserFindNodeInfo:
1757 * @ctx: an XML parser context
1758 * @node: an XML node within the tree
1760 * Find the parser node info struct for a given node
1762 * Returns an xmlParserNodeInfo block pointer or NULL
1764 const xmlParserNodeInfo
*
1765 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx
, const xmlNodePtr node
)
1769 if ((ctx
== NULL
) || (node
== NULL
))
1771 /* Find position where node should be at */
1772 pos
= xmlParserFindNodeInfoIndex(&ctx
->node_seq
, node
);
1773 if (pos
< ctx
->node_seq
.length
1774 && ctx
->node_seq
.buffer
[pos
].node
== node
)
1775 return &ctx
->node_seq
.buffer
[pos
];
1782 * xmlInitNodeInfoSeq:
1783 * @seq: a node info sequence pointer
1785 * -- Initialize (set to initial state) node info sequence
1788 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq
)
1798 * xmlClearNodeInfoSeq:
1799 * @seq: a node info sequence pointer
1801 * -- Clear (release memory and reinitialize) node
1805 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq
)
1809 if (seq
->buffer
!= NULL
)
1810 xmlFree(seq
->buffer
);
1811 xmlInitNodeInfoSeq(seq
);
1815 * xmlParserFindNodeInfoIndex:
1816 * @seq: a node info sequence pointer
1817 * @node: an XML node pointer
1820 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1821 * the given node is or should be at in a sorted sequence
1823 * Returns a long indicating the position of the record
1826 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq
,
1827 const xmlNodePtr node
)
1829 unsigned long upper
, lower
, middle
;
1832 if ((seq
== NULL
) || (node
== NULL
))
1833 return ((unsigned long) -1);
1835 /* Do a binary search for the key */
1837 upper
= seq
->length
;
1839 while (lower
<= upper
&& !found
) {
1840 middle
= lower
+ (upper
- lower
) / 2;
1841 if (node
== seq
->buffer
[middle
- 1].node
)
1843 else if (node
< seq
->buffer
[middle
- 1].node
)
1849 /* Return position */
1850 if (middle
== 0 || seq
->buffer
[middle
- 1].node
< node
)
1858 * xmlParserAddNodeInfo:
1859 * @ctxt: an XML parser context
1860 * @info: a node info sequence pointer
1862 * Insert node info record into the sorted sequence
1865 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt
,
1866 const xmlParserNodeInfoPtr info
)
1870 if ((ctxt
== NULL
) || (info
== NULL
)) return;
1872 /* Find pos and check to see if node is already in the sequence */
1873 pos
= xmlParserFindNodeInfoIndex(&ctxt
->node_seq
, (xmlNodePtr
)
1876 if ((pos
< ctxt
->node_seq
.length
) &&
1877 (ctxt
->node_seq
.buffer
!= NULL
) &&
1878 (ctxt
->node_seq
.buffer
[pos
].node
== info
->node
)) {
1879 ctxt
->node_seq
.buffer
[pos
] = *info
;
1882 /* Otherwise, we need to add new node to buffer */
1884 if ((ctxt
->node_seq
.length
+ 1 > ctxt
->node_seq
.maximum
) ||
1885 (ctxt
->node_seq
.buffer
== NULL
)) {
1886 xmlParserNodeInfo
*tmp_buffer
;
1887 unsigned int byte_size
;
1889 if (ctxt
->node_seq
.maximum
== 0)
1890 ctxt
->node_seq
.maximum
= 2;
1891 byte_size
= (sizeof(*ctxt
->node_seq
.buffer
) *
1892 (2 * ctxt
->node_seq
.maximum
));
1894 if (ctxt
->node_seq
.buffer
== NULL
)
1895 tmp_buffer
= (xmlParserNodeInfo
*) xmlMalloc(byte_size
);
1898 (xmlParserNodeInfo
*) xmlRealloc(ctxt
->node_seq
.buffer
,
1901 if (tmp_buffer
== NULL
) {
1902 xmlErrMemory(ctxt
, "failed to allocate buffer\n");
1905 ctxt
->node_seq
.buffer
= tmp_buffer
;
1906 ctxt
->node_seq
.maximum
*= 2;
1909 /* If position is not at end, move elements out of the way */
1910 if (pos
!= ctxt
->node_seq
.length
) {
1913 for (i
= ctxt
->node_seq
.length
; i
> pos
; i
--)
1914 ctxt
->node_seq
.buffer
[i
] = ctxt
->node_seq
.buffer
[i
- 1];
1917 /* Copy element and increase length */
1918 ctxt
->node_seq
.buffer
[pos
] = *info
;
1919 ctxt
->node_seq
.length
++;
1923 /************************************************************************
1925 * Defaults settings *
1927 ************************************************************************/
1929 * xmlPedanticParserDefault:
1932 * Set and return the previous value for enabling pedantic warnings.
1934 * Returns the last value for 0 for no substitution, 1 for substitution.
1938 xmlPedanticParserDefault(int val
) {
1939 int old
= xmlPedanticParserDefaultValue
;
1941 xmlPedanticParserDefaultValue
= val
;
1946 * xmlLineNumbersDefault:
1949 * Set and return the previous value for enabling line numbers in elements
1950 * contents. This may break on old application and is turned off by default.
1952 * Returns the last value for 0 for no substitution, 1 for substitution.
1956 xmlLineNumbersDefault(int val
) {
1957 int old
= xmlLineNumbersDefaultValue
;
1959 xmlLineNumbersDefaultValue
= val
;
1964 * xmlSubstituteEntitiesDefault:
1967 * Set and return the previous value for default entity support.
1968 * Initially the parser always keep entity references instead of substituting
1969 * entity values in the output. This function has to be used to change the
1970 * default parser behavior
1971 * SAX::substituteEntities() has to be used for changing that on a file by
1974 * Returns the last value for 0 for no substitution, 1 for substitution.
1978 xmlSubstituteEntitiesDefault(int val
) {
1979 int old
= xmlSubstituteEntitiesDefaultValue
;
1981 xmlSubstituteEntitiesDefaultValue
= val
;
1986 * xmlKeepBlanksDefault:
1989 * Set and return the previous value for default blanks text nodes support.
1990 * The 1.x version of the parser used an heuristic to try to detect
1991 * ignorable white spaces. As a result the SAX callback was generating
1992 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
1993 * using the DOM output text nodes containing those blanks were not generated.
1994 * The 2.x and later version will switch to the XML standard way and
1995 * ignorableWhitespace() are only generated when running the parser in
1996 * validating mode and when the current element doesn't allow CDATA or
1998 * This function is provided as a way to force the standard behavior
1999 * on 1.X libs and to switch back to the old mode for compatibility when
2000 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2001 * by using xmlIsBlankNode() commodity function to detect the "empty"
2003 * This value also affect autogeneration of indentation when saving code
2004 * if blanks sections are kept, indentation is not generated.
2006 * Returns the last value for 0 for no substitution, 1 for substitution.
2010 xmlKeepBlanksDefault(int val
) {
2011 int old
= xmlKeepBlanksDefaultValue
;
2013 xmlKeepBlanksDefaultValue
= val
;
2014 if (!val
) xmlIndentTreeOutput
= 1;