2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
31 #ifdef LIBXML_ICONV_ENABLED
35 #include <libxml/encoding.h>
36 #include <libxml/xmlmemory.h>
37 #ifdef LIBXML_HTML_ENABLED
38 #include <libxml/HTMLparser.h>
40 #include <libxml/globals.h>
41 #include <libxml/xmlerror.h>
46 #ifdef LIBXML_ICU_ENABLED
47 #include <unicode/ucnv.h>
48 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
49 #define ICU_PIVOT_BUF_SIZE 1024
50 typedef struct _uconv_t uconv_t
;
52 UConverter
*uconv
; /* for conversion between an encoding and UTF-16 */
53 UConverter
*utf8
; /* for conversion between UTF-8 and UTF-16 */
54 UChar pivot_buf
[ICU_PIVOT_BUF_SIZE
];
60 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler
= NULL
;
61 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler
= NULL
;
63 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias
;
64 typedef xmlCharEncodingAlias
*xmlCharEncodingAliasPtr
;
65 struct _xmlCharEncodingAlias
{
70 static xmlCharEncodingAliasPtr xmlCharEncodingAliases
= NULL
;
71 static int xmlCharEncodingAliasesNb
= 0;
72 static int xmlCharEncodingAliasesMax
= 0;
74 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
76 #define DEBUG_ENCODING /* Define this to get encoding traces */
79 #ifdef LIBXML_ISO8859X_ENABLED
80 static void xmlRegisterCharEncodingHandlersISO8859x (void);
84 static int xmlLittleEndian
= 1;
87 * xmlEncodingErrMemory:
88 * @extra: extra information
90 * Handle an out of memory condition
93 xmlEncodingErrMemory(const char *extra
)
95 __xmlSimpleError(XML_FROM_I18N
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
100 * @error: the error number
101 * @msg: the error message
105 static void LIBXML_ATTR_FORMAT(2,0)
106 xmlEncodingErr(xmlParserErrors error
, const char *msg
, const char *val
)
108 __xmlRaiseError(NULL
, NULL
, NULL
, NULL
, NULL
,
109 XML_FROM_I18N
, error
, XML_ERR_FATAL
,
110 NULL
, 0, val
, NULL
, NULL
, 0, 0, msg
, val
);
113 #ifdef LIBXML_ICU_ENABLED
115 openIcuConverter(const char* name
, int toUnicode
)
117 UErrorCode status
= U_ZERO_ERROR
;
118 uconv_t
*conv
= (uconv_t
*) xmlMalloc(sizeof(uconv_t
));
122 conv
->pivot_source
= conv
->pivot_buf
;
123 conv
->pivot_target
= conv
->pivot_buf
;
125 conv
->uconv
= ucnv_open(name
, &status
);
126 if (U_FAILURE(status
))
129 status
= U_ZERO_ERROR
;
131 ucnv_setToUCallBack(conv
->uconv
, UCNV_TO_U_CALLBACK_STOP
,
132 NULL
, NULL
, NULL
, &status
);
135 ucnv_setFromUCallBack(conv
->uconv
, UCNV_FROM_U_CALLBACK_STOP
,
136 NULL
, NULL
, NULL
, &status
);
138 if (U_FAILURE(status
))
141 status
= U_ZERO_ERROR
;
142 conv
->utf8
= ucnv_open("UTF-8", &status
);
143 if (U_SUCCESS(status
))
148 ucnv_close(conv
->uconv
);
154 closeIcuConverter(uconv_t
*conv
)
157 ucnv_close(conv
->uconv
);
158 ucnv_close(conv
->utf8
);
162 #endif /* LIBXML_ICU_ENABLED */
164 /************************************************************************
166 * Conversions To/From UTF8 encoding *
168 ************************************************************************/
172 * @out: a pointer to an array of bytes to store the result
173 * @outlen: the length of @out
174 * @in: a pointer to an array of ASCII chars
175 * @inlen: the length of @in
177 * Take a block of ASCII chars in and try to convert it to an UTF-8
178 * block of chars out.
179 * Returns 0 if success, or -1 otherwise
180 * The value of @inlen after return is the number of octets consumed
181 * if the return value is positive, else unpredictable.
182 * The value of @outlen after return is the number of octets produced.
185 asciiToUTF8(unsigned char* out
, int *outlen
,
186 const unsigned char* in
, int *inlen
) {
187 unsigned char* outstart
= out
;
188 const unsigned char* base
= in
;
189 const unsigned char* processed
= in
;
190 unsigned char* outend
= out
+ *outlen
;
191 const unsigned char* inend
;
194 inend
= in
+ (*inlen
);
195 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
203 *outlen
= out
- outstart
;
204 *inlen
= processed
- base
;
208 processed
= (const unsigned char*) in
;
210 *outlen
= out
- outstart
;
211 *inlen
= processed
- base
;
215 #ifdef LIBXML_OUTPUT_ENABLED
218 * @out: a pointer to an array of bytes to store the result
219 * @outlen: the length of @out
220 * @in: a pointer to an array of UTF-8 chars
221 * @inlen: the length of @in
223 * Take a block of UTF-8 chars in and try to convert it to an ASCII
224 * block of chars out.
226 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
227 * The value of @inlen after return is the number of octets consumed
228 * if the return value is positive, else unpredictable.
229 * The value of @outlen after return is the number of octets produced.
232 UTF8Toascii(unsigned char* out
, int *outlen
,
233 const unsigned char* in
, int *inlen
) {
234 const unsigned char* processed
= in
;
235 const unsigned char* outend
;
236 const unsigned char* outstart
= out
;
237 const unsigned char* instart
= in
;
238 const unsigned char* inend
;
242 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
245 * initialization nothing to do
251 inend
= in
+ (*inlen
);
252 outend
= out
+ (*outlen
);
255 if (d
< 0x80) { c
= d
; trailing
= 0; }
257 /* trailing byte in leading position */
258 *outlen
= out
- outstart
;
259 *inlen
= processed
- instart
;
261 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
262 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
263 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
265 /* no chance for this in Ascii */
266 *outlen
= out
- outstart
;
267 *inlen
= processed
- instart
;
271 if (inend
- in
< trailing
) {
275 for ( ; trailing
; trailing
--) {
276 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
282 /* assertion: c is a single UTF-4 value */
288 /* no chance for this in Ascii */
289 *outlen
= out
- outstart
;
290 *inlen
= processed
- instart
;
295 *outlen
= out
- outstart
;
296 *inlen
= processed
- instart
;
299 #endif /* LIBXML_OUTPUT_ENABLED */
303 * @out: a pointer to an array of bytes to store the result
304 * @outlen: the length of @out
305 * @in: a pointer to an array of ISO Latin 1 chars
306 * @inlen: the length of @in
308 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
309 * block of chars out.
310 * Returns the number of bytes written if success, or -1 otherwise
311 * The value of @inlen after return is the number of octets consumed
312 * if the return value is positive, else unpredictable.
313 * The value of @outlen after return is the number of octets produced.
316 isolat1ToUTF8(unsigned char* out
, int *outlen
,
317 const unsigned char* in
, int *inlen
) {
318 unsigned char* outstart
= out
;
319 const unsigned char* base
= in
;
320 unsigned char* outend
;
321 const unsigned char* inend
;
322 const unsigned char* instop
;
324 if ((out
== NULL
) || (in
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
))
327 outend
= out
+ *outlen
;
328 inend
= in
+ (*inlen
);
331 while ((in
< inend
) && (out
< outend
- 1)) {
333 *out
++ = (((*in
) >> 6) & 0x1F) | 0xC0;
334 *out
++ = ((*in
) & 0x3F) | 0x80;
337 if ((instop
- in
) > (outend
- out
)) instop
= in
+ (outend
- out
);
338 while ((in
< instop
) && (*in
< 0x80)) {
342 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
345 *outlen
= out
- outstart
;
352 * @out: a pointer to an array of bytes to store the result
353 * @outlen: the length of @out
354 * @inb: a pointer to an array of UTF-8 chars
355 * @inlenb: the length of @in in UTF-8 chars
357 * No op copy operation for UTF8 handling.
359 * Returns the number of bytes written, or -1 if lack of space.
360 * The value of *inlen after return is the number of octets consumed
361 * if the return value is positive, else unpredictable.
364 UTF8ToUTF8(unsigned char* out
, int *outlen
,
365 const unsigned char* inb
, int *inlenb
)
369 if ((out
== NULL
) || (outlen
== NULL
) || (inlenb
== NULL
))
372 /* inb == NULL means output is initialized. */
377 if (*outlen
> *inlenb
) {
386 * FIXME: Conversion functions must assure valid UTF-8, so we have
387 * to check for UTF-8 validity. Preferably, this converter shouldn't
390 memcpy(out
, inb
, len
);
398 #ifdef LIBXML_OUTPUT_ENABLED
401 * @out: a pointer to an array of bytes to store the result
402 * @outlen: the length of @out
403 * @in: a pointer to an array of UTF-8 chars
404 * @inlen: the length of @in
406 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
407 * block of chars out.
409 * Returns the number of bytes written if success, -2 if the transcoding fails,
411 * The value of @inlen after return is the number of octets consumed
412 * if the return value is positive, else unpredictable.
413 * The value of @outlen after return is the number of octets produced.
416 UTF8Toisolat1(unsigned char* out
, int *outlen
,
417 const unsigned char* in
, int *inlen
) {
418 const unsigned char* processed
= in
;
419 const unsigned char* outend
;
420 const unsigned char* outstart
= out
;
421 const unsigned char* instart
= in
;
422 const unsigned char* inend
;
426 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
429 * initialization nothing to do
435 inend
= in
+ (*inlen
);
436 outend
= out
+ (*outlen
);
439 if (d
< 0x80) { c
= d
; trailing
= 0; }
441 /* trailing byte in leading position */
442 *outlen
= out
- outstart
;
443 *inlen
= processed
- instart
;
445 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
446 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
447 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
449 /* no chance for this in IsoLat1 */
450 *outlen
= out
- outstart
;
451 *inlen
= processed
- instart
;
455 if (inend
- in
< trailing
) {
459 for ( ; trailing
; trailing
--) {
462 if (((d
= *in
++) & 0xC0) != 0x80) {
463 *outlen
= out
- outstart
;
464 *inlen
= processed
- instart
;
471 /* assertion: c is a single UTF-4 value */
477 /* no chance for this in IsoLat1 */
478 *outlen
= out
- outstart
;
479 *inlen
= processed
- instart
;
484 *outlen
= out
- outstart
;
485 *inlen
= processed
- instart
;
488 #endif /* LIBXML_OUTPUT_ENABLED */
492 * @out: a pointer to an array of bytes to store the result
493 * @outlen: the length of @out
494 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
495 * @inlenb: the length of @in in UTF-16LE chars
497 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
498 * block of chars out. This function assumes the endian property
499 * is the same between the native type of this machine and the
502 * Returns the number of bytes written, or -1 if lack of space, or -2
503 * if the transcoding fails (if *in is not a valid utf16 string)
504 * The value of *inlen after return is the number of octets consumed
505 * if the return value is positive, else unpredictable.
508 UTF16LEToUTF8(unsigned char* out
, int *outlen
,
509 const unsigned char* inb
, int *inlenb
)
511 unsigned char* outstart
= out
;
512 const unsigned char* processed
= inb
;
513 unsigned char* outend
;
514 unsigned short* in
= (unsigned short*) inb
;
515 unsigned short* inend
;
516 unsigned int c
, d
, inlen
;
524 outend
= out
+ *outlen
;
525 if ((*inlenb
% 2) == 1)
529 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
530 if (xmlLittleEndian
) {
533 tmp
= (unsigned char *) in
;
535 c
= c
| (((unsigned int)*tmp
) << 8);
538 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
539 if (in
>= inend
) { /* handle split mutli-byte characters */
542 if (xmlLittleEndian
) {
545 tmp
= (unsigned char *) in
;
547 d
= d
| (((unsigned int)*tmp
) << 8);
550 if ((d
& 0xFC00) == 0xDC00) {
557 *outlen
= out
- outstart
;
558 *inlenb
= processed
- inb
;
563 /* assertion: c is a single UTF-4 value */
566 if (c
< 0x80) { *out
++= c
; bits
= -6; }
567 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
568 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
569 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
571 for ( ; bits
>= 0; bits
-= 6) {
574 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
576 processed
= (const unsigned char*) in
;
578 *outlen
= out
- outstart
;
579 *inlenb
= processed
- inb
;
583 #ifdef LIBXML_OUTPUT_ENABLED
586 * @outb: a pointer to an array of bytes to store the result
587 * @outlen: the length of @outb
588 * @in: a pointer to an array of UTF-8 chars
589 * @inlen: the length of @in
591 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
592 * block of chars out.
594 * Returns the number of bytes written, or -1 if lack of space, or -2
595 * if the transcoding failed.
598 UTF8ToUTF16LE(unsigned char* outb
, int *outlen
,
599 const unsigned char* in
, int *inlen
)
601 unsigned short* out
= (unsigned short*) outb
;
602 const unsigned char* processed
= in
;
603 const unsigned char *const instart
= in
;
604 unsigned short* outstart
= out
;
605 unsigned short* outend
;
606 const unsigned char* inend
;
610 unsigned short tmp1
, tmp2
;
612 /* UTF16LE encoding has no BOM */
613 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
620 outend
= out
+ (*outlen
/ 2);
623 if (d
< 0x80) { c
= d
; trailing
= 0; }
625 /* trailing byte in leading position */
626 *outlen
= (out
- outstart
) * 2;
627 *inlen
= processed
- instart
;
629 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
630 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
631 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
633 /* no chance for this in UTF-16 */
634 *outlen
= (out
- outstart
) * 2;
635 *inlen
= processed
- instart
;
639 if (inend
- in
< trailing
) {
643 for ( ; trailing
; trailing
--) {
644 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
650 /* assertion: c is a single UTF-4 value */
654 if (xmlLittleEndian
) {
657 tmp
= (unsigned char *) out
;
659 *(tmp
+ 1) = c
>> 8 ;
663 else if (c
< 0x110000) {
667 if (xmlLittleEndian
) {
668 *out
++ = 0xD800 | (c
>> 10);
669 *out
++ = 0xDC00 | (c
& 0x03FF);
671 tmp1
= 0xD800 | (c
>> 10);
672 tmp
= (unsigned char *) out
;
673 *tmp
= (unsigned char) tmp1
;
674 *(tmp
+ 1) = tmp1
>> 8;
677 tmp2
= 0xDC00 | (c
& 0x03FF);
678 tmp
= (unsigned char *) out
;
679 *tmp
= (unsigned char) tmp2
;
680 *(tmp
+ 1) = tmp2
>> 8;
688 *outlen
= (out
- outstart
) * 2;
689 *inlen
= processed
- instart
;
695 * @outb: a pointer to an array of bytes to store the result
696 * @outlen: the length of @outb
697 * @in: a pointer to an array of UTF-8 chars
698 * @inlen: the length of @in
700 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
701 * block of chars out.
703 * Returns the number of bytes written, or -1 if lack of space, or -2
704 * if the transcoding failed.
707 UTF8ToUTF16(unsigned char* outb
, int *outlen
,
708 const unsigned char* in
, int *inlen
)
712 * initialization, add the Byte Order Mark for UTF-16LE
719 #ifdef DEBUG_ENCODING
720 xmlGenericError(xmlGenericErrorContext
,
721 "Added FFFE Byte Order Mark\n");
729 return (UTF8ToUTF16LE(outb
, outlen
, in
, inlen
));
731 #endif /* LIBXML_OUTPUT_ENABLED */
735 * @out: a pointer to an array of bytes to store the result
736 * @outlen: the length of @out
737 * @inb: a pointer to an array of UTF-16 passed as a byte array
738 * @inlenb: the length of @in in UTF-16 chars
740 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
741 * block of chars out. This function assumes the endian property
742 * is the same between the native type of this machine and the
745 * Returns the number of bytes written, or -1 if lack of space, or -2
746 * if the transcoding fails (if *in is not a valid utf16 string)
747 * The value of *inlen after return is the number of octets consumed
748 * if the return value is positive, else unpredictable.
751 UTF16BEToUTF8(unsigned char* out
, int *outlen
,
752 const unsigned char* inb
, int *inlenb
)
754 unsigned char* outstart
= out
;
755 const unsigned char* processed
= inb
;
756 unsigned char* outend
;
757 unsigned short* in
= (unsigned short*) inb
;
758 unsigned short* inend
;
759 unsigned int c
, d
, inlen
;
767 outend
= out
+ *outlen
;
768 if ((*inlenb
% 2) == 1)
772 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
773 if (xmlLittleEndian
) {
774 tmp
= (unsigned char *) in
;
776 c
= (c
<< 8) | (unsigned int) *tmp
;
781 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
782 if (in
>= inend
) { /* handle split mutli-byte characters */
785 if (xmlLittleEndian
) {
786 tmp
= (unsigned char *) in
;
788 d
= (d
<< 8) | (unsigned int) *tmp
;
793 if ((d
& 0xFC00) == 0xDC00) {
800 *outlen
= out
- outstart
;
801 *inlenb
= processed
- inb
;
806 /* assertion: c is a single UTF-4 value */
809 if (c
< 0x80) { *out
++= c
; bits
= -6; }
810 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
811 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
812 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
814 for ( ; bits
>= 0; bits
-= 6) {
817 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
819 processed
= (const unsigned char*) in
;
821 *outlen
= out
- outstart
;
822 *inlenb
= processed
- inb
;
826 #ifdef LIBXML_OUTPUT_ENABLED
829 * @outb: a pointer to an array of bytes to store the result
830 * @outlen: the length of @outb
831 * @in: a pointer to an array of UTF-8 chars
832 * @inlen: the length of @in
834 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
835 * block of chars out.
837 * Returns the number of byte written, or -1 by lack of space, or -2
838 * if the transcoding failed.
841 UTF8ToUTF16BE(unsigned char* outb
, int *outlen
,
842 const unsigned char* in
, int *inlen
)
844 unsigned short* out
= (unsigned short*) outb
;
845 const unsigned char* processed
= in
;
846 const unsigned char *const instart
= in
;
847 unsigned short* outstart
= out
;
848 unsigned short* outend
;
849 const unsigned char* inend
;
853 unsigned short tmp1
, tmp2
;
855 /* UTF-16BE has no BOM */
856 if ((outb
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
863 outend
= out
+ (*outlen
/ 2);
866 if (d
< 0x80) { c
= d
; trailing
= 0; }
868 /* trailing byte in leading position */
869 *outlen
= out
- outstart
;
870 *inlen
= processed
- instart
;
872 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
873 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
874 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
876 /* no chance for this in UTF-16 */
877 *outlen
= out
- outstart
;
878 *inlen
= processed
- instart
;
882 if (inend
- in
< trailing
) {
886 for ( ; trailing
; trailing
--) {
887 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80)) break;
892 /* assertion: c is a single UTF-4 value */
894 if (out
>= outend
) break;
895 if (xmlLittleEndian
) {
896 tmp
= (unsigned char *) out
;
904 else if (c
< 0x110000) {
905 if (out
+1 >= outend
) break;
907 if (xmlLittleEndian
) {
908 tmp1
= 0xD800 | (c
>> 10);
909 tmp
= (unsigned char *) out
;
911 *(tmp
+ 1) = (unsigned char) tmp1
;
914 tmp2
= 0xDC00 | (c
& 0x03FF);
915 tmp
= (unsigned char *) out
;
917 *(tmp
+ 1) = (unsigned char) tmp2
;
920 *out
++ = 0xD800 | (c
>> 10);
921 *out
++ = 0xDC00 | (c
& 0x03FF);
928 *outlen
= (out
- outstart
) * 2;
929 *inlen
= processed
- instart
;
932 #endif /* LIBXML_OUTPUT_ENABLED */
934 /************************************************************************
936 * Generic encoding handling routines *
938 ************************************************************************/
941 * xmlDetectCharEncoding:
942 * @in: a pointer to the first bytes of the XML entity, must be at least
943 * 2 bytes long (at least 4 if encoding is UTF4 variant).
944 * @len: pointer to the length of the buffer
946 * Guess the encoding of the entity using the first bytes of the entity content
947 * according to the non-normative appendix F of the XML-1.0 recommendation.
949 * Returns one of the XML_CHAR_ENCODING_... values.
952 xmlDetectCharEncoding(const unsigned char* in
, int len
)
955 return(XML_CHAR_ENCODING_NONE
);
957 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
958 (in
[2] == 0x00) && (in
[3] == 0x3C))
959 return(XML_CHAR_ENCODING_UCS4BE
);
960 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
961 (in
[2] == 0x00) && (in
[3] == 0x00))
962 return(XML_CHAR_ENCODING_UCS4LE
);
963 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
964 (in
[2] == 0x3C) && (in
[3] == 0x00))
965 return(XML_CHAR_ENCODING_UCS4_2143
);
966 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
967 (in
[2] == 0x00) && (in
[3] == 0x00))
968 return(XML_CHAR_ENCODING_UCS4_3412
);
969 if ((in
[0] == 0x4C) && (in
[1] == 0x6F) &&
970 (in
[2] == 0xA7) && (in
[3] == 0x94))
971 return(XML_CHAR_ENCODING_EBCDIC
);
972 if ((in
[0] == 0x3C) && (in
[1] == 0x3F) &&
973 (in
[2] == 0x78) && (in
[3] == 0x6D))
974 return(XML_CHAR_ENCODING_UTF8
);
976 * Although not part of the recommendation, we also
977 * attempt an "auto-recognition" of UTF-16LE and
978 * UTF-16BE encodings.
980 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
981 (in
[2] == 0x3F) && (in
[3] == 0x00))
982 return(XML_CHAR_ENCODING_UTF16LE
);
983 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
984 (in
[2] == 0x00) && (in
[3] == 0x3F))
985 return(XML_CHAR_ENCODING_UTF16BE
);
989 * Errata on XML-1.0 June 20 2001
990 * We now allow an UTF8 encoded BOM
992 if ((in
[0] == 0xEF) && (in
[1] == 0xBB) &&
994 return(XML_CHAR_ENCODING_UTF8
);
996 /* For UTF-16 we can recognize by the BOM */
998 if ((in
[0] == 0xFE) && (in
[1] == 0xFF))
999 return(XML_CHAR_ENCODING_UTF16BE
);
1000 if ((in
[0] == 0xFF) && (in
[1] == 0xFE))
1001 return(XML_CHAR_ENCODING_UTF16LE
);
1003 return(XML_CHAR_ENCODING_NONE
);
1007 * xmlCleanupEncodingAliases:
1009 * Unregisters all aliases
1012 xmlCleanupEncodingAliases(void) {
1015 if (xmlCharEncodingAliases
== NULL
)
1018 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1019 if (xmlCharEncodingAliases
[i
].name
!= NULL
)
1020 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1021 if (xmlCharEncodingAliases
[i
].alias
!= NULL
)
1022 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1024 xmlCharEncodingAliasesNb
= 0;
1025 xmlCharEncodingAliasesMax
= 0;
1026 xmlFree(xmlCharEncodingAliases
);
1027 xmlCharEncodingAliases
= NULL
;
1031 * xmlGetEncodingAlias:
1032 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1034 * Lookup an encoding name for the given alias.
1036 * Returns NULL if not found, otherwise the original name
1039 xmlGetEncodingAlias(const char *alias
) {
1046 if (xmlCharEncodingAliases
== NULL
)
1049 for (i
= 0;i
< 99;i
++) {
1050 upper
[i
] = toupper(alias
[i
]);
1051 if (upper
[i
] == 0) break;
1056 * Walk down the list looking for a definition of the alias
1058 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1059 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1060 return(xmlCharEncodingAliases
[i
].name
);
1067 * xmlAddEncodingAlias:
1068 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1069 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1071 * Registers an alias @alias for an encoding named @name. Existing alias
1072 * will be overwritten.
1074 * Returns 0 in case of success, -1 in case of error
1077 xmlAddEncodingAlias(const char *name
, const char *alias
) {
1081 if ((name
== NULL
) || (alias
== NULL
))
1084 for (i
= 0;i
< 99;i
++) {
1085 upper
[i
] = toupper(alias
[i
]);
1086 if (upper
[i
] == 0) break;
1090 if (xmlCharEncodingAliases
== NULL
) {
1091 xmlCharEncodingAliasesNb
= 0;
1092 xmlCharEncodingAliasesMax
= 20;
1093 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1094 xmlMalloc(xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1095 if (xmlCharEncodingAliases
== NULL
)
1097 } else if (xmlCharEncodingAliasesNb
>= xmlCharEncodingAliasesMax
) {
1098 xmlCharEncodingAliasesMax
*= 2;
1099 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1100 xmlRealloc(xmlCharEncodingAliases
,
1101 xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1104 * Walk down the list looking for a definition of the alias
1106 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1107 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1109 * Replace the definition.
1111 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1112 xmlCharEncodingAliases
[i
].name
= xmlMemStrdup(name
);
1117 * Add the definition
1119 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].name
= xmlMemStrdup(name
);
1120 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].alias
= xmlMemStrdup(upper
);
1121 xmlCharEncodingAliasesNb
++;
1126 * xmlDelEncodingAlias:
1127 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1129 * Unregisters an encoding alias @alias
1131 * Returns 0 in case of success, -1 in case of error
1134 xmlDelEncodingAlias(const char *alias
) {
1140 if (xmlCharEncodingAliases
== NULL
)
1143 * Walk down the list looking for a definition of the alias
1145 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1146 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, alias
)) {
1147 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1148 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1149 xmlCharEncodingAliasesNb
--;
1150 memmove(&xmlCharEncodingAliases
[i
], &xmlCharEncodingAliases
[i
+ 1],
1151 sizeof(xmlCharEncodingAlias
) * (xmlCharEncodingAliasesNb
- i
));
1159 * xmlParseCharEncoding:
1160 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1162 * Compare the string to the encoding schemes already known. Note
1163 * that the comparison is case insensitive accordingly to the section
1164 * [XML] 4.3.3 Character Encoding in Entities.
1166 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1167 * if not recognized.
1170 xmlParseCharEncoding(const char* name
)
1177 return(XML_CHAR_ENCODING_NONE
);
1180 * Do the alias resolution
1182 alias
= xmlGetEncodingAlias(name
);
1186 for (i
= 0;i
< 499;i
++) {
1187 upper
[i
] = toupper(name
[i
]);
1188 if (upper
[i
] == 0) break;
1192 if (!strcmp(upper
, "")) return(XML_CHAR_ENCODING_NONE
);
1193 if (!strcmp(upper
, "UTF-8")) return(XML_CHAR_ENCODING_UTF8
);
1194 if (!strcmp(upper
, "UTF8")) return(XML_CHAR_ENCODING_UTF8
);
1197 * NOTE: if we were able to parse this, the endianness of UTF16 is
1198 * already found and in use
1200 if (!strcmp(upper
, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE
);
1201 if (!strcmp(upper
, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE
);
1203 if (!strcmp(upper
, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1204 if (!strcmp(upper
, "UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1205 if (!strcmp(upper
, "UCS2")) return(XML_CHAR_ENCODING_UCS2
);
1208 * NOTE: if we were able to parse this, the endianness of UCS4 is
1209 * already found and in use
1211 if (!strcmp(upper
, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1212 if (!strcmp(upper
, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1213 if (!strcmp(upper
, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE
);
1216 if (!strcmp(upper
, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1
);
1217 if (!strcmp(upper
, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1
);
1218 if (!strcmp(upper
, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1
);
1220 if (!strcmp(upper
, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2
);
1221 if (!strcmp(upper
, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2
);
1222 if (!strcmp(upper
, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2
);
1224 if (!strcmp(upper
, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3
);
1225 if (!strcmp(upper
, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4
);
1226 if (!strcmp(upper
, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5
);
1227 if (!strcmp(upper
, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6
);
1228 if (!strcmp(upper
, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7
);
1229 if (!strcmp(upper
, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8
);
1230 if (!strcmp(upper
, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9
);
1232 if (!strcmp(upper
, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP
);
1233 if (!strcmp(upper
, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS
);
1234 if (!strcmp(upper
, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP
);
1236 #ifdef DEBUG_ENCODING
1237 xmlGenericError(xmlGenericErrorContext
, "Unknown encoding %s\n", name
);
1239 return(XML_CHAR_ENCODING_ERROR
);
1243 * xmlGetCharEncodingName:
1244 * @enc: the encoding
1246 * The "canonical" name for XML encoding.
1247 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1248 * Section 4.3.3 Character Encoding in Entities
1250 * Returns the canonical name for the given encoding
1254 xmlGetCharEncodingName(xmlCharEncoding enc
) {
1256 case XML_CHAR_ENCODING_ERROR
:
1258 case XML_CHAR_ENCODING_NONE
:
1260 case XML_CHAR_ENCODING_UTF8
:
1262 case XML_CHAR_ENCODING_UTF16LE
:
1264 case XML_CHAR_ENCODING_UTF16BE
:
1266 case XML_CHAR_ENCODING_EBCDIC
:
1268 case XML_CHAR_ENCODING_UCS4LE
:
1269 return("ISO-10646-UCS-4");
1270 case XML_CHAR_ENCODING_UCS4BE
:
1271 return("ISO-10646-UCS-4");
1272 case XML_CHAR_ENCODING_UCS4_2143
:
1273 return("ISO-10646-UCS-4");
1274 case XML_CHAR_ENCODING_UCS4_3412
:
1275 return("ISO-10646-UCS-4");
1276 case XML_CHAR_ENCODING_UCS2
:
1277 return("ISO-10646-UCS-2");
1278 case XML_CHAR_ENCODING_8859_1
:
1279 return("ISO-8859-1");
1280 case XML_CHAR_ENCODING_8859_2
:
1281 return("ISO-8859-2");
1282 case XML_CHAR_ENCODING_8859_3
:
1283 return("ISO-8859-3");
1284 case XML_CHAR_ENCODING_8859_4
:
1285 return("ISO-8859-4");
1286 case XML_CHAR_ENCODING_8859_5
:
1287 return("ISO-8859-5");
1288 case XML_CHAR_ENCODING_8859_6
:
1289 return("ISO-8859-6");
1290 case XML_CHAR_ENCODING_8859_7
:
1291 return("ISO-8859-7");
1292 case XML_CHAR_ENCODING_8859_8
:
1293 return("ISO-8859-8");
1294 case XML_CHAR_ENCODING_8859_9
:
1295 return("ISO-8859-9");
1296 case XML_CHAR_ENCODING_2022_JP
:
1297 return("ISO-2022-JP");
1298 case XML_CHAR_ENCODING_SHIFT_JIS
:
1299 return("Shift-JIS");
1300 case XML_CHAR_ENCODING_EUC_JP
:
1302 case XML_CHAR_ENCODING_ASCII
:
1308 /************************************************************************
1310 * Char encoding handlers *
1312 ************************************************************************/
1315 /* the size should be growable, but it's not a big deal ... */
1316 #define MAX_ENCODING_HANDLERS 50
1317 static xmlCharEncodingHandlerPtr
*handlers
= NULL
;
1318 static int nbCharEncodingHandler
= 0;
1321 * The default is UTF-8 for XML, that's also the default used for the
1322 * parser internals, so the default encoding handler is NULL
1325 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler
= NULL
;
1328 * xmlNewCharEncodingHandler:
1329 * @name: the encoding name, in UTF-8 format (ASCII actually)
1330 * @input: the xmlCharEncodingInputFunc to read that encoding
1331 * @output: the xmlCharEncodingOutputFunc to write that encoding
1333 * Create and registers an xmlCharEncodingHandler.
1335 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1337 xmlCharEncodingHandlerPtr
1338 xmlNewCharEncodingHandler(const char *name
,
1339 xmlCharEncodingInputFunc input
,
1340 xmlCharEncodingOutputFunc output
) {
1341 xmlCharEncodingHandlerPtr handler
;
1348 * Do the alias resolution
1350 alias
= xmlGetEncodingAlias(name
);
1355 * Keep only the uppercase version of the encoding.
1358 xmlEncodingErr(XML_I18N_NO_NAME
,
1359 "xmlNewCharEncodingHandler : no name !\n", NULL
);
1362 for (i
= 0;i
< 499;i
++) {
1363 upper
[i
] = toupper(name
[i
]);
1364 if (upper
[i
] == 0) break;
1367 up
= xmlMemStrdup(upper
);
1369 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1374 * allocate and fill-up an handler block.
1376 handler
= (xmlCharEncodingHandlerPtr
)
1377 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1378 if (handler
== NULL
) {
1380 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1383 memset(handler
, 0, sizeof(xmlCharEncodingHandler
));
1384 handler
->input
= input
;
1385 handler
->output
= output
;
1388 #ifdef LIBXML_ICONV_ENABLED
1389 handler
->iconv_in
= NULL
;
1390 handler
->iconv_out
= NULL
;
1392 #ifdef LIBXML_ICU_ENABLED
1393 handler
->uconv_in
= NULL
;
1394 handler
->uconv_out
= NULL
;
1398 * registers and returns the handler.
1400 xmlRegisterCharEncodingHandler(handler
);
1401 #ifdef DEBUG_ENCODING
1402 xmlGenericError(xmlGenericErrorContext
,
1403 "Registered encoding handler for %s\n", name
);
1409 * xmlInitCharEncodingHandlers:
1411 * DEPRECATED: This function will be made private. Call xmlInitParser to
1412 * initialize the library.
1414 * Initialize the char encoding support, it registers the default
1415 * encoding supported.
1416 * NOTE: while public, this function usually doesn't need to be called
1417 * in normal processing.
1420 xmlInitCharEncodingHandlers(void) {
1421 unsigned short int tst
= 0x1234;
1422 unsigned char *ptr
= (unsigned char *) &tst
;
1424 if (handlers
!= NULL
) return;
1426 handlers
= (xmlCharEncodingHandlerPtr
*)
1427 xmlMalloc(MAX_ENCODING_HANDLERS
* sizeof(xmlCharEncodingHandlerPtr
));
1429 if (*ptr
== 0x12) xmlLittleEndian
= 0;
1430 else if (*ptr
== 0x34) xmlLittleEndian
= 1;
1432 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1433 "Odd problem at endianness detection\n", NULL
);
1436 if (handlers
== NULL
) {
1437 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1440 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8
, UTF8ToUTF8
);
1441 #ifdef LIBXML_OUTPUT_ENABLED
1443 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8
, UTF8ToUTF16LE
);
1445 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8
, UTF8ToUTF16BE
);
1446 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8
, UTF8ToUTF16
);
1447 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8
, UTF8Toisolat1
);
1448 xmlNewCharEncodingHandler("ASCII", asciiToUTF8
, UTF8Toascii
);
1449 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8
, UTF8Toascii
);
1450 #ifdef LIBXML_HTML_ENABLED
1451 xmlNewCharEncodingHandler("HTML", NULL
, UTF8ToHtml
);
1455 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8
, NULL
);
1457 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8
, NULL
);
1458 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8
, NULL
);
1459 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8
, NULL
);
1460 xmlNewCharEncodingHandler("ASCII", asciiToUTF8
, NULL
);
1461 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8
, NULL
);
1462 #endif /* LIBXML_OUTPUT_ENABLED */
1463 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464 #ifdef LIBXML_ISO8859X_ENABLED
1465 xmlRegisterCharEncodingHandlersISO8859x ();
1472 * xmlCleanupCharEncodingHandlers:
1474 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1475 * to free global state but see the warnings there. xmlCleanupParser
1476 * should be only called once at program exit. In most cases, you don't
1477 * have call cleanup functions at all.
1479 * Cleanup the memory allocated for the char encoding support, it
1480 * unregisters all the encoding handlers and the aliases.
1483 xmlCleanupCharEncodingHandlers(void) {
1484 xmlCleanupEncodingAliases();
1486 if (handlers
== NULL
) return;
1488 for (;nbCharEncodingHandler
> 0;) {
1489 nbCharEncodingHandler
--;
1490 if (handlers
[nbCharEncodingHandler
] != NULL
) {
1491 if (handlers
[nbCharEncodingHandler
]->name
!= NULL
)
1492 xmlFree(handlers
[nbCharEncodingHandler
]->name
);
1493 xmlFree(handlers
[nbCharEncodingHandler
]);
1498 nbCharEncodingHandler
= 0;
1499 xmlDefaultCharEncodingHandler
= NULL
;
1503 * xmlRegisterCharEncodingHandler:
1504 * @handler: the xmlCharEncodingHandlerPtr handler block
1506 * Register the char encoding handler, surprising, isn't it ?
1509 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler
) {
1510 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1511 if ((handler
== NULL
) || (handlers
== NULL
)) {
1512 xmlEncodingErr(XML_I18N_NO_HANDLER
,
1513 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL
);
1517 if (nbCharEncodingHandler
>= MAX_ENCODING_HANDLERS
) {
1518 xmlEncodingErr(XML_I18N_EXCESS_HANDLER
,
1519 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520 "MAX_ENCODING_HANDLERS");
1523 handlers
[nbCharEncodingHandler
++] = handler
;
1527 if (handler
!= NULL
) {
1528 if (handler
->name
!= NULL
) {
1529 xmlFree(handler
->name
);
1536 * xmlGetCharEncodingHandler:
1537 * @enc: an xmlCharEncoding value.
1539 * Search in the registered set the handler able to read/write that encoding.
1541 * Returns the handler or NULL if not found
1543 xmlCharEncodingHandlerPtr
1544 xmlGetCharEncodingHandler(xmlCharEncoding enc
) {
1545 xmlCharEncodingHandlerPtr handler
;
1547 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1549 case XML_CHAR_ENCODING_ERROR
:
1551 case XML_CHAR_ENCODING_NONE
:
1553 case XML_CHAR_ENCODING_UTF8
:
1555 case XML_CHAR_ENCODING_UTF16LE
:
1556 return(xmlUTF16LEHandler
);
1557 case XML_CHAR_ENCODING_UTF16BE
:
1558 return(xmlUTF16BEHandler
);
1559 case XML_CHAR_ENCODING_EBCDIC
:
1560 handler
= xmlFindCharEncodingHandler("EBCDIC");
1561 if (handler
!= NULL
) return(handler
);
1562 handler
= xmlFindCharEncodingHandler("ebcdic");
1563 if (handler
!= NULL
) return(handler
);
1564 handler
= xmlFindCharEncodingHandler("EBCDIC-US");
1565 if (handler
!= NULL
) return(handler
);
1566 handler
= xmlFindCharEncodingHandler("IBM-037");
1567 if (handler
!= NULL
) return(handler
);
1569 case XML_CHAR_ENCODING_UCS4BE
:
1570 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571 if (handler
!= NULL
) return(handler
);
1572 handler
= xmlFindCharEncodingHandler("UCS-4");
1573 if (handler
!= NULL
) return(handler
);
1574 handler
= xmlFindCharEncodingHandler("UCS4");
1575 if (handler
!= NULL
) return(handler
);
1577 case XML_CHAR_ENCODING_UCS4LE
:
1578 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579 if (handler
!= NULL
) return(handler
);
1580 handler
= xmlFindCharEncodingHandler("UCS-4");
1581 if (handler
!= NULL
) return(handler
);
1582 handler
= xmlFindCharEncodingHandler("UCS4");
1583 if (handler
!= NULL
) return(handler
);
1585 case XML_CHAR_ENCODING_UCS4_2143
:
1587 case XML_CHAR_ENCODING_UCS4_3412
:
1589 case XML_CHAR_ENCODING_UCS2
:
1590 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591 if (handler
!= NULL
) return(handler
);
1592 handler
= xmlFindCharEncodingHandler("UCS-2");
1593 if (handler
!= NULL
) return(handler
);
1594 handler
= xmlFindCharEncodingHandler("UCS2");
1595 if (handler
!= NULL
) return(handler
);
1599 * We used to keep ISO Latin encodings native in the
1600 * generated data. This led to so many problems that
1601 * this has been removed. One can still change this
1602 * back by registering no-ops encoders for those
1604 case XML_CHAR_ENCODING_8859_1
:
1605 handler
= xmlFindCharEncodingHandler("ISO-8859-1");
1606 if (handler
!= NULL
) return(handler
);
1608 case XML_CHAR_ENCODING_8859_2
:
1609 handler
= xmlFindCharEncodingHandler("ISO-8859-2");
1610 if (handler
!= NULL
) return(handler
);
1612 case XML_CHAR_ENCODING_8859_3
:
1613 handler
= xmlFindCharEncodingHandler("ISO-8859-3");
1614 if (handler
!= NULL
) return(handler
);
1616 case XML_CHAR_ENCODING_8859_4
:
1617 handler
= xmlFindCharEncodingHandler("ISO-8859-4");
1618 if (handler
!= NULL
) return(handler
);
1620 case XML_CHAR_ENCODING_8859_5
:
1621 handler
= xmlFindCharEncodingHandler("ISO-8859-5");
1622 if (handler
!= NULL
) return(handler
);
1624 case XML_CHAR_ENCODING_8859_6
:
1625 handler
= xmlFindCharEncodingHandler("ISO-8859-6");
1626 if (handler
!= NULL
) return(handler
);
1628 case XML_CHAR_ENCODING_8859_7
:
1629 handler
= xmlFindCharEncodingHandler("ISO-8859-7");
1630 if (handler
!= NULL
) return(handler
);
1632 case XML_CHAR_ENCODING_8859_8
:
1633 handler
= xmlFindCharEncodingHandler("ISO-8859-8");
1634 if (handler
!= NULL
) return(handler
);
1636 case XML_CHAR_ENCODING_8859_9
:
1637 handler
= xmlFindCharEncodingHandler("ISO-8859-9");
1638 if (handler
!= NULL
) return(handler
);
1642 case XML_CHAR_ENCODING_2022_JP
:
1643 handler
= xmlFindCharEncodingHandler("ISO-2022-JP");
1644 if (handler
!= NULL
) return(handler
);
1646 case XML_CHAR_ENCODING_SHIFT_JIS
:
1647 handler
= xmlFindCharEncodingHandler("SHIFT-JIS");
1648 if (handler
!= NULL
) return(handler
);
1649 handler
= xmlFindCharEncodingHandler("SHIFT_JIS");
1650 if (handler
!= NULL
) return(handler
);
1651 handler
= xmlFindCharEncodingHandler("Shift_JIS");
1652 if (handler
!= NULL
) return(handler
);
1654 case XML_CHAR_ENCODING_EUC_JP
:
1655 handler
= xmlFindCharEncodingHandler("EUC-JP");
1656 if (handler
!= NULL
) return(handler
);
1662 #ifdef DEBUG_ENCODING
1663 xmlGenericError(xmlGenericErrorContext
,
1664 "No handler found for encoding %d\n", enc
);
1670 * xmlFindCharEncodingHandler:
1671 * @name: a string describing the char encoding.
1673 * Search in the registered set the handler able to read/write that encoding.
1675 * Returns the handler or NULL if not found
1677 xmlCharEncodingHandlerPtr
1678 xmlFindCharEncodingHandler(const char *name
) {
1681 xmlCharEncoding alias
;
1682 #ifdef LIBXML_ICONV_ENABLED
1683 xmlCharEncodingHandlerPtr enc
;
1684 iconv_t icv_in
, icv_out
;
1685 #endif /* LIBXML_ICONV_ENABLED */
1686 #ifdef LIBXML_ICU_ENABLED
1687 xmlCharEncodingHandlerPtr encu
;
1688 uconv_t
*ucv_in
, *ucv_out
;
1689 #endif /* LIBXML_ICU_ENABLED */
1693 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1694 if (name
== NULL
) return(xmlDefaultCharEncodingHandler
);
1695 if (name
[0] == 0) return(xmlDefaultCharEncodingHandler
);
1698 * Do the alias resolution
1701 nalias
= xmlGetEncodingAlias(name
);
1706 * Check first for directly registered encoding names
1708 for (i
= 0;i
< 99;i
++) {
1709 upper
[i
] = toupper(name
[i
]);
1710 if (upper
[i
] == 0) break;
1714 if (handlers
!= NULL
) {
1715 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
1716 if (!strcmp(upper
, handlers
[i
]->name
)) {
1717 #ifdef DEBUG_ENCODING
1718 xmlGenericError(xmlGenericErrorContext
,
1719 "Found registered handler for encoding %s\n", name
);
1721 return(handlers
[i
]);
1726 #ifdef LIBXML_ICONV_ENABLED
1727 /* check whether iconv can handle this */
1728 icv_in
= iconv_open("UTF-8", name
);
1729 icv_out
= iconv_open(name
, "UTF-8");
1730 if (icv_in
== (iconv_t
) -1) {
1731 icv_in
= iconv_open("UTF-8", upper
);
1733 if (icv_out
== (iconv_t
) -1) {
1734 icv_out
= iconv_open(upper
, "UTF-8");
1736 if ((icv_in
!= (iconv_t
) -1) && (icv_out
!= (iconv_t
) -1)) {
1737 enc
= (xmlCharEncodingHandlerPtr
)
1738 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1740 iconv_close(icv_in
);
1741 iconv_close(icv_out
);
1744 memset(enc
, 0, sizeof(xmlCharEncodingHandler
));
1745 enc
->name
= xmlMemStrdup(name
);
1748 enc
->iconv_in
= icv_in
;
1749 enc
->iconv_out
= icv_out
;
1750 #ifdef DEBUG_ENCODING
1751 xmlGenericError(xmlGenericErrorContext
,
1752 "Found iconv handler for encoding %s\n", name
);
1755 } else if ((icv_in
!= (iconv_t
) -1) || icv_out
!= (iconv_t
) -1) {
1756 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1757 "iconv : problems with filters for '%s'\n", name
);
1758 if (icv_in
!= (iconv_t
) -1)
1759 iconv_close(icv_in
);
1761 iconv_close(icv_out
);
1763 #endif /* LIBXML_ICONV_ENABLED */
1764 #ifdef LIBXML_ICU_ENABLED
1765 /* check whether icu can handle this */
1766 ucv_in
= openIcuConverter(name
, 1);
1767 ucv_out
= openIcuConverter(name
, 0);
1768 if (ucv_in
!= NULL
&& ucv_out
!= NULL
) {
1769 encu
= (xmlCharEncodingHandlerPtr
)
1770 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1772 closeIcuConverter(ucv_in
);
1773 closeIcuConverter(ucv_out
);
1776 memset(encu
, 0, sizeof(xmlCharEncodingHandler
));
1777 encu
->name
= xmlMemStrdup(name
);
1779 encu
->output
= NULL
;
1780 encu
->uconv_in
= ucv_in
;
1781 encu
->uconv_out
= ucv_out
;
1782 #ifdef DEBUG_ENCODING
1783 xmlGenericError(xmlGenericErrorContext
,
1784 "Found ICU converter handler for encoding %s\n", name
);
1787 } else if (ucv_in
!= NULL
|| ucv_out
!= NULL
) {
1788 closeIcuConverter(ucv_in
);
1789 closeIcuConverter(ucv_out
);
1790 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1791 "ICU converter : problems with filters for '%s'\n", name
);
1793 #endif /* LIBXML_ICU_ENABLED */
1795 #ifdef DEBUG_ENCODING
1796 xmlGenericError(xmlGenericErrorContext
,
1797 "No handler found for encoding %s\n", name
);
1801 * Fallback using the canonical names
1803 alias
= xmlParseCharEncoding(norig
);
1804 if (alias
!= XML_CHAR_ENCODING_ERROR
) {
1806 canon
= xmlGetCharEncodingName(alias
);
1807 if ((canon
!= NULL
) && (strcmp(name
, canon
))) {
1808 return(xmlFindCharEncodingHandler(canon
));
1812 /* If "none of the above", give up */
1816 /************************************************************************
1818 * ICONV based generic conversion functions *
1820 ************************************************************************/
1822 #ifdef LIBXML_ICONV_ENABLED
1825 * @cd: iconv converter data structure
1826 * @out: a pointer to an array of bytes to store the result
1827 * @outlen: the length of @out
1828 * @in: a pointer to an array of input bytes
1829 * @inlen: the length of @in
1831 * Returns 0 if success, or
1832 * -1 by lack of space, or
1833 * -2 if the transcoding fails (for *in is not valid utf8 string or
1834 * the result of transformation can't fit into the encoding we want), or
1835 * -3 if there the last byte can't form a single output char.
1837 * The value of @inlen after return is the number of octets consumed
1838 * as the return value is positive, else unpredictable.
1839 * The value of @outlen after return is the number of octets produced.
1842 xmlIconvWrapper(iconv_t cd
, unsigned char *out
, int *outlen
,
1843 const unsigned char *in
, int *inlen
) {
1844 size_t icv_inlen
, icv_outlen
;
1845 const char *icv_in
= (const char *) in
;
1846 char *icv_out
= (char *) out
;
1849 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1850 if (outlen
!= NULL
) *outlen
= 0;
1854 icv_outlen
= *outlen
;
1856 * Some versions take const, other versions take non-const input.
1858 ret
= iconv(cd
, (void *) &icv_in
, &icv_inlen
, &icv_out
, &icv_outlen
);
1859 *inlen
-= icv_inlen
;
1860 *outlen
-= icv_outlen
;
1861 if ((icv_inlen
!= 0) || (ret
== (size_t) -1)) {
1863 if (errno
== EILSEQ
) {
1868 if (errno
== E2BIG
) {
1873 if (errno
== EINVAL
) {
1883 #endif /* LIBXML_ICONV_ENABLED */
1885 /************************************************************************
1887 * ICU based generic conversion functions *
1889 ************************************************************************/
1891 #ifdef LIBXML_ICU_ENABLED
1894 * @cd: ICU uconverter data structure
1895 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1896 * @out: a pointer to an array of bytes to store the result
1897 * @outlen: the length of @out
1898 * @in: a pointer to an array of input bytes
1899 * @inlen: the length of @in
1900 * @flush: if true, indicates end of input
1902 * Returns 0 if success, or
1903 * -1 by lack of space, or
1904 * -2 if the transcoding fails (for *in is not valid utf8 string or
1905 * the result of transformation can't fit into the encoding we want), or
1906 * -3 if there the last byte can't form a single output char.
1908 * The value of @inlen after return is the number of octets consumed
1909 * as the return value is positive, else unpredictable.
1910 * The value of @outlen after return is the number of octets produced.
1913 xmlUconvWrapper(uconv_t
*cd
, int toUnicode
, unsigned char *out
, int *outlen
,
1914 const unsigned char *in
, int *inlen
, int flush
) {
1915 const char *ucv_in
= (const char *) in
;
1916 char *ucv_out
= (char *) out
;
1917 UErrorCode err
= U_ZERO_ERROR
;
1919 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1920 if (outlen
!= NULL
) *outlen
= 0;
1925 /* encoding => UTF-16 => UTF-8 */
1926 ucnv_convertEx(cd
->utf8
, cd
->uconv
, &ucv_out
, ucv_out
+ *outlen
,
1927 &ucv_in
, ucv_in
+ *inlen
, cd
->pivot_buf
,
1928 &cd
->pivot_source
, &cd
->pivot_target
,
1929 cd
->pivot_buf
+ ICU_PIVOT_BUF_SIZE
, 0, flush
, &err
);
1931 /* UTF-8 => UTF-16 => encoding */
1932 ucnv_convertEx(cd
->uconv
, cd
->utf8
, &ucv_out
, ucv_out
+ *outlen
,
1933 &ucv_in
, ucv_in
+ *inlen
, cd
->pivot_buf
,
1934 &cd
->pivot_source
, &cd
->pivot_target
,
1935 cd
->pivot_buf
+ ICU_PIVOT_BUF_SIZE
, 0, flush
, &err
);
1937 *inlen
= ucv_in
- (const char*) in
;
1938 *outlen
= ucv_out
- (char *) out
;
1939 if (U_SUCCESS(err
)) {
1940 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1942 cd
->pivot_source
= cd
->pivot_target
= cd
->pivot_buf
;
1945 if (err
== U_BUFFER_OVERFLOW_ERROR
)
1947 if (err
== U_INVALID_CHAR_FOUND
|| err
== U_ILLEGAL_CHAR_FOUND
)
1951 #endif /* LIBXML_ICU_ENABLED */
1953 /************************************************************************
1955 * The real API used by libxml for on-the-fly conversion *
1957 ************************************************************************/
1961 * @handler: encoding handler
1962 * @out: a pointer to an array of bytes to store the result
1963 * @outlen: the length of @out
1964 * @in: a pointer to an array of input bytes
1965 * @inlen: the length of @in
1966 * @flush: flush (ICU-related)
1968 * Returns 0 if success, or
1969 * -1 by lack of space, or
1970 * -2 if the transcoding fails (for *in is not valid utf8 string or
1971 * the result of transformation can't fit into the encoding we want), or
1972 * -3 if there the last byte can't form a single output char.
1974 * The value of @inlen after return is the number of octets consumed
1975 * as the return value is 0, else unpredictable.
1976 * The value of @outlen after return is the number of octets produced.
1979 xmlEncInputChunk(xmlCharEncodingHandler
*handler
, unsigned char *out
,
1980 int *outlen
, const unsigned char *in
, int *inlen
, int flush
) {
1984 if (handler
->input
!= NULL
) {
1985 ret
= handler
->input(out
, outlen
, in
, inlen
);
1989 #ifdef LIBXML_ICONV_ENABLED
1990 else if (handler
->iconv_in
!= NULL
) {
1991 ret
= xmlIconvWrapper(handler
->iconv_in
, out
, outlen
, in
, inlen
);
1993 #endif /* LIBXML_ICONV_ENABLED */
1994 #ifdef LIBXML_ICU_ENABLED
1995 else if (handler
->uconv_in
!= NULL
) {
1996 ret
= xmlUconvWrapper(handler
->uconv_in
, 1, out
, outlen
, in
, inlen
,
1999 #endif /* LIBXML_ICU_ENABLED */
2010 * xmlEncOutputChunk:
2011 * @handler: encoding handler
2012 * @out: a pointer to an array of bytes to store the result
2013 * @outlen: the length of @out
2014 * @in: a pointer to an array of input bytes
2015 * @inlen: the length of @in
2017 * Returns 0 if success, or
2018 * -1 by lack of space, or
2019 * -2 if the transcoding fails (for *in is not valid utf8 string or
2020 * the result of transformation can't fit into the encoding we want), or
2021 * -3 if there the last byte can't form a single output char.
2022 * -4 if no output function was found.
2024 * The value of @inlen after return is the number of octets consumed
2025 * as the return value is 0, else unpredictable.
2026 * The value of @outlen after return is the number of octets produced.
2029 xmlEncOutputChunk(xmlCharEncodingHandler
*handler
, unsigned char *out
,
2030 int *outlen
, const unsigned char *in
, int *inlen
) {
2033 if (handler
->output
!= NULL
) {
2034 ret
= handler
->output(out
, outlen
, in
, inlen
);
2038 #ifdef LIBXML_ICONV_ENABLED
2039 else if (handler
->iconv_out
!= NULL
) {
2040 ret
= xmlIconvWrapper(handler
->iconv_out
, out
, outlen
, in
, inlen
);
2042 #endif /* LIBXML_ICONV_ENABLED */
2043 #ifdef LIBXML_ICU_ENABLED
2044 else if (handler
->uconv_out
!= NULL
) {
2045 ret
= xmlUconvWrapper(handler
->uconv_out
, 0, out
, outlen
, in
, inlen
,
2048 #endif /* LIBXML_ICU_ENABLED */
2059 * xmlCharEncFirstLineInt:
2060 * @handler: char encoding transformation data structure
2061 * @out: an xmlBuffer for the output.
2062 * @in: an xmlBuffer for the input
2063 * @len: number of bytes to convert for the first line, or -1
2065 * Front-end for the encoding handler input function, but handle only
2066 * the very first line, i.e. limit itself to 45 chars.
2068 * Returns the number of byte written if success, or
2070 * -2 if the transcoding fails (for *in is not valid utf8 string or
2071 * the result of transformation can't fit into the encoding we want), or
2074 xmlCharEncFirstLineInt(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2075 xmlBufferPtr in
, int len
) {
2080 if (handler
== NULL
) return(-1);
2081 if (out
== NULL
) return(-1);
2082 if (in
== NULL
) return(-1);
2084 /* calculate space available */
2085 written
= out
->size
- out
->use
- 1; /* count '\0' */
2088 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089 * 45 chars should be sufficient to reach the end of the encoding
2090 * declaration without going too far inside the document content.
2091 * on UTF-16 this means 90bytes, on UCS4 this means 180
2092 * The actual value depending on guessed encoding is passed as @len
2102 if (toconv
* 2 >= written
) {
2103 xmlBufferGrow(out
, toconv
* 2);
2104 written
= out
->size
- out
->use
- 1;
2107 ret
= xmlEncInputChunk(handler
, &out
->content
[out
->use
], &written
,
2108 in
->content
, &toconv
, 0);
2109 xmlBufferShrink(in
, toconv
);
2110 out
->use
+= written
;
2111 out
->content
[out
->use
] = 0;
2112 if (ret
== -1) ret
= -3;
2114 #ifdef DEBUG_ENCODING
2117 xmlGenericError(xmlGenericErrorContext
,
2118 "converted %d bytes to %d bytes of input\n",
2122 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of input, %d left\n",
2123 toconv
, written
, in
->use
);
2126 xmlGenericError(xmlGenericErrorContext
,
2127 "input conversion failed due to input error\n");
2130 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of input, %d left\n",
2131 toconv
, written
, in
->use
);
2134 xmlGenericError(xmlGenericErrorContext
,"Unknown input conversion failed %d\n", ret
);
2136 #endif /* DEBUG_ENCODING */
2138 * Ignore when input buffer is not on a boundary
2140 if (ret
== -3) ret
= 0;
2141 if (ret
== -1) ret
= 0;
2142 return(written
? written
: ret
);
2146 * xmlCharEncFirstLine:
2147 * @handler: char encoding transformation data structure
2148 * @out: an xmlBuffer for the output.
2149 * @in: an xmlBuffer for the input
2151 * Front-end for the encoding handler input function, but handle only
2152 * the very first line, i.e. limit itself to 45 chars.
2154 * Returns the number of byte written if success, or
2156 * -2 if the transcoding fails (for *in is not valid utf8 string or
2157 * the result of transformation can't fit into the encoding we want), or
2160 xmlCharEncFirstLine(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2162 return(xmlCharEncFirstLineInt(handler
, out
, in
, -1));
2166 * xmlCharEncFirstLineInput:
2167 * @input: a parser input buffer
2168 * @len: number of bytes to convert for the first line, or -1
2170 * Front-end for the encoding handler input function, but handle only
2171 * the very first line. Point is that this is based on autodetection
2172 * of the encoding and once that first line is converted we may find
2173 * out that a different decoder is needed to process the input.
2175 * Returns the number of byte written if success, or
2177 * -2 if the transcoding fails (for *in is not valid utf8 string or
2178 * the result of transformation can't fit into the encoding we want), or
2181 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input
, int len
)
2191 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2192 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2194 out
= input
->buffer
;
2197 toconv
= xmlBufUse(in
);
2200 written
= xmlBufAvail(out
);
2202 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203 * 45 chars should be sufficient to reach the end of the encoding
2204 * declaration without going too far inside the document content.
2205 * on UTF-16 this means 90bytes, on UCS4 this means 180
2206 * The actual value depending on guessed encoding is passed as @len
2210 if (toconv
> (unsigned int) len
)
2216 if (toconv
* 2 >= written
) {
2217 xmlBufGrow(out
, toconv
* 2);
2218 written
= xmlBufAvail(out
);
2225 ret
= xmlEncInputChunk(input
->encoder
, xmlBufEnd(out
), &c_out
,
2226 xmlBufContent(in
), &c_in
, 0);
2227 xmlBufShrink(in
, c_in
);
2228 xmlBufAddLen(out
, c_out
);
2234 #ifdef DEBUG_ENCODING
2235 xmlGenericError(xmlGenericErrorContext
,
2236 "converted %d bytes to %d bytes of input\n",
2241 #ifdef DEBUG_ENCODING
2242 xmlGenericError(xmlGenericErrorContext
,
2243 "converted %d bytes to %d bytes of input, %d left\n",
2244 c_in
, c_out
, (int)xmlBufUse(in
));
2248 #ifdef DEBUG_ENCODING
2249 xmlGenericError(xmlGenericErrorContext
,
2250 "converted %d bytes to %d bytes of input, %d left\n",
2251 c_in
, c_out
, (int)xmlBufUse(in
));
2256 const xmlChar
*content
= xmlBufContent(in
);
2258 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259 content
[0], content
[1],
2260 content
[2], content
[3]);
2262 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2263 "input conversion failed due to input error, bytes %s\n",
2268 * Ignore when input buffer is not on a boundary
2270 if (ret
== -3) ret
= 0;
2271 if (ret
== -1) ret
= 0;
2272 return(c_out
? c_out
: ret
);
2277 * @input: a parser input buffer
2278 * @flush: try to flush all the raw buffer
2280 * Generic front-end for the encoding handler on parser input
2282 * Returns the number of byte written if success, or
2284 * -2 if the transcoding fails (for *in is not valid utf8 string or
2285 * the result of transformation can't fit into the encoding we want), or
2288 xmlCharEncInput(xmlParserInputBufferPtr input
, int flush
)
2298 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2299 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2301 out
= input
->buffer
;
2304 toconv
= xmlBufUse(in
);
2307 if ((toconv
> 64 * 1024) && (flush
== 0))
2309 written
= xmlBufAvail(out
);
2310 if (toconv
* 2 >= written
) {
2311 xmlBufGrow(out
, toconv
* 2);
2312 written
= xmlBufAvail(out
);
2314 if ((written
> 128 * 1024) && (flush
== 0))
2315 written
= 128 * 1024;
2319 ret
= xmlEncInputChunk(input
->encoder
, xmlBufEnd(out
), &c_out
,
2320 xmlBufContent(in
), &c_in
, flush
);
2321 xmlBufShrink(in
, c_in
);
2322 xmlBufAddLen(out
, c_out
);
2328 #ifdef DEBUG_ENCODING
2329 xmlGenericError(xmlGenericErrorContext
,
2330 "converted %d bytes to %d bytes of input\n",
2335 #ifdef DEBUG_ENCODING
2336 xmlGenericError(xmlGenericErrorContext
,
2337 "converted %d bytes to %d bytes of input, %d left\n",
2338 c_in
, c_out
, (int)xmlBufUse(in
));
2342 #ifdef DEBUG_ENCODING
2343 xmlGenericError(xmlGenericErrorContext
,
2344 "converted %d bytes to %d bytes of input, %d left\n",
2345 c_in
, c_out
, (int)xmlBufUse(in
));
2350 const xmlChar
*content
= xmlBufContent(in
);
2352 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353 content
[0], content
[1],
2354 content
[2], content
[3]);
2356 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2357 "input conversion failed due to input error, bytes %s\n",
2362 * Ignore when input buffer is not on a boundary
2366 return (c_out
? c_out
: ret
);
2371 * @handler: char encoding transformation data structure
2372 * @out: an xmlBuffer for the output.
2373 * @in: an xmlBuffer for the input
2375 * Generic front-end for the encoding handler input function
2377 * Returns the number of byte written if success, or
2379 * -2 if the transcoding fails (for *in is not valid utf8 string or
2380 * the result of transformation can't fit into the encoding we want), or
2383 xmlCharEncInFunc(xmlCharEncodingHandler
* handler
, xmlBufferPtr out
,
2390 if (handler
== NULL
)
2400 written
= out
->size
- out
->use
-1; /* count '\0' */
2401 if (toconv
* 2 >= written
) {
2402 xmlBufferGrow(out
, out
->size
+ toconv
* 2);
2403 written
= out
->size
- out
->use
- 1;
2405 ret
= xmlEncInputChunk(handler
, &out
->content
[out
->use
], &written
,
2406 in
->content
, &toconv
, 1);
2407 xmlBufferShrink(in
, toconv
);
2408 out
->use
+= written
;
2409 out
->content
[out
->use
] = 0;
2415 #ifdef DEBUG_ENCODING
2416 xmlGenericError(xmlGenericErrorContext
,
2417 "converted %d bytes to %d bytes of input\n",
2422 #ifdef DEBUG_ENCODING
2423 xmlGenericError(xmlGenericErrorContext
,
2424 "converted %d bytes to %d bytes of input, %d left\n",
2425 toconv
, written
, in
->use
);
2429 #ifdef DEBUG_ENCODING
2430 xmlGenericError(xmlGenericErrorContext
,
2431 "converted %d bytes to %d bytes of input, %d left\n",
2432 toconv
, written
, in
->use
);
2438 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439 in
->content
[0], in
->content
[1],
2440 in
->content
[2], in
->content
[3]);
2442 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2443 "input conversion failed due to input error, bytes %s\n",
2448 * Ignore when input buffer is not on a boundary
2452 return (written
? written
: ret
);
2455 #ifdef LIBXML_OUTPUT_ENABLED
2458 * @output: a parser output buffer
2459 * @init: is this an initialization call without data
2461 * Generic front-end for the encoding handler on parser output
2462 * a first call with @init == 1 has to be made first to initiate the
2463 * output in case of non-stateless encoding needing to initiate their
2464 * state or the output (like the BOM in UTF16).
2465 * In case of UTF8 sequence conversion errors for the given encoder,
2466 * the content will be automatically remapped to a CharRef sequence.
2468 * Returns the number of byte written if success, or
2470 * -2 if the transcoding fails (for *in is not valid utf8 string or
2471 * the result of transformation can't fit into the encoding we want), or
2474 xmlCharEncOutput(xmlOutputBufferPtr output
, int init
)
2485 if ((output
== NULL
) || (output
->encoder
== NULL
) ||
2486 (output
->buffer
== NULL
) || (output
->conv
== NULL
))
2489 in
= output
->buffer
;
2493 written
= xmlBufAvail(out
);
2496 * First specific handling of the initialization call
2501 /* TODO: Check return value. */
2502 xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2504 xmlBufAddLen(out
, c_out
);
2505 #ifdef DEBUG_ENCODING
2506 xmlGenericError(xmlGenericErrorContext
,
2507 "initialized encoder\n");
2513 * Conversion itself.
2515 toconv
= xmlBufUse(in
);
2517 return (writtentot
);
2518 if (toconv
> 64 * 1024)
2520 if (toconv
* 4 >= written
) {
2521 xmlBufGrow(out
, toconv
* 4);
2522 written
= xmlBufAvail(out
);
2524 if (written
> 256 * 1024)
2525 written
= 256 * 1024;
2529 ret
= xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2530 xmlBufContent(in
), &c_in
);
2531 xmlBufShrink(in
, c_in
);
2532 xmlBufAddLen(out
, c_out
);
2533 writtentot
+= c_out
;
2536 /* Can be a limitation of iconv or uconv */
2543 * Attempt to handle error cases
2547 #ifdef DEBUG_ENCODING
2548 xmlGenericError(xmlGenericErrorContext
,
2549 "converted %d bytes to %d bytes of output\n",
2554 #ifdef DEBUG_ENCODING
2555 xmlGenericError(xmlGenericErrorContext
,
2556 "output conversion failed by lack of space\n");
2560 #ifdef DEBUG_ENCODING
2561 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2562 c_in
, c_out
, (int) xmlBufUse(in
));
2566 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2567 "xmlCharEncOutFunc: no output function !\n", NULL
);
2571 xmlChar charref
[20];
2572 int len
= (int) xmlBufUse(in
);
2573 xmlChar
*content
= xmlBufContent(in
);
2574 int cur
, charrefLen
;
2576 cur
= xmlGetUTF8Char(content
, &len
);
2580 #ifdef DEBUG_ENCODING
2581 xmlGenericError(xmlGenericErrorContext
,
2582 "handling output conversion error\n");
2583 xmlGenericError(xmlGenericErrorContext
,
2584 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2585 content
[0], content
[1],
2586 content
[2], content
[3]);
2589 * Removes the UTF8 sequence, and replace it by a charref
2590 * and continue the transcoding phase, hoping the error
2591 * did not mangle the encoder state.
2593 charrefLen
= snprintf((char *) &charref
[0], sizeof(charref
),
2595 xmlBufShrink(in
, len
);
2596 xmlBufGrow(out
, charrefLen
* 4);
2597 c_out
= xmlBufAvail(out
);
2599 ret
= xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2602 if ((ret
< 0) || (c_in
!= charrefLen
)) {
2605 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2606 content
[0], content
[1],
2607 content
[2], content
[3]);
2609 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2610 "output conversion failed due to conv error, bytes %s\n",
2612 if (xmlBufGetAllocationScheme(in
) != XML_BUFFER_ALLOC_IMMUTABLE
)
2617 xmlBufAddLen(out
, c_out
);
2618 writtentot
+= c_out
;
2622 return(writtentot
? writtentot
: ret
);
2627 * xmlCharEncOutFunc:
2628 * @handler: char encoding transformation data structure
2629 * @out: an xmlBuffer for the output.
2630 * @in: an xmlBuffer for the input
2632 * Generic front-end for the encoding handler output function
2633 * a first call with @in == NULL has to be made firs to initiate the
2634 * output in case of non-stateless encoding needing to initiate their
2635 * state or the output (like the BOM in UTF16).
2636 * In case of UTF8 sequence conversion errors for the given encoder,
2637 * the content will be automatically remapped to a CharRef sequence.
2639 * Returns the number of byte written if success, or
2641 * -2 if the transcoding fails (for *in is not valid utf8 string or
2642 * the result of transformation can't fit into the encoding we want), or
2645 xmlCharEncOutFunc(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2652 if (handler
== NULL
) return(-1);
2653 if (out
== NULL
) return(-1);
2657 written
= out
->size
- out
->use
;
2660 written
--; /* Gennady: count '/0' */
2663 * First specific handling of in = NULL, i.e. the initialization call
2667 /* TODO: Check return value. */
2668 xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2670 out
->use
+= written
;
2671 out
->content
[out
->use
] = 0;
2672 #ifdef DEBUG_ENCODING
2673 xmlGenericError(xmlGenericErrorContext
,
2674 "initialized encoder\n");
2680 * Conversion itself.
2685 if (toconv
* 4 >= written
) {
2686 xmlBufferGrow(out
, toconv
* 4);
2687 written
= out
->size
- out
->use
- 1;
2689 ret
= xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2690 in
->content
, &toconv
);
2691 xmlBufferShrink(in
, toconv
);
2692 out
->use
+= written
;
2693 writtentot
+= written
;
2694 out
->content
[out
->use
] = 0;
2697 /* Can be a limitation of iconv or uconv */
2704 * Attempt to handle error cases
2708 #ifdef DEBUG_ENCODING
2709 xmlGenericError(xmlGenericErrorContext
,
2710 "converted %d bytes to %d bytes of output\n",
2715 #ifdef DEBUG_ENCODING
2716 xmlGenericError(xmlGenericErrorContext
,
2717 "output conversion failed by lack of space\n");
2721 #ifdef DEBUG_ENCODING
2722 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2723 toconv
, written
, in
->use
);
2727 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2728 "xmlCharEncOutFunc: no output function !\n", NULL
);
2732 xmlChar charref
[20];
2734 const xmlChar
*utf
= (const xmlChar
*) in
->content
;
2735 int cur
, charrefLen
;
2737 cur
= xmlGetUTF8Char(utf
, &len
);
2741 #ifdef DEBUG_ENCODING
2742 xmlGenericError(xmlGenericErrorContext
,
2743 "handling output conversion error\n");
2744 xmlGenericError(xmlGenericErrorContext
,
2745 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746 in
->content
[0], in
->content
[1],
2747 in
->content
[2], in
->content
[3]);
2750 * Removes the UTF8 sequence, and replace it by a charref
2751 * and continue the transcoding phase, hoping the error
2752 * did not mangle the encoder state.
2754 charrefLen
= snprintf((char *) &charref
[0], sizeof(charref
),
2756 xmlBufferShrink(in
, len
);
2757 xmlBufferGrow(out
, charrefLen
* 4);
2758 written
= out
->size
- out
->use
- 1;
2759 toconv
= charrefLen
;
2760 ret
= xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2763 if ((ret
< 0) || (toconv
!= charrefLen
)) {
2766 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767 in
->content
[0], in
->content
[1],
2768 in
->content
[2], in
->content
[3]);
2770 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2771 "output conversion failed due to conv error, bytes %s\n",
2773 if (in
->alloc
!= XML_BUFFER_ALLOC_IMMUTABLE
)
2774 in
->content
[0] = ' ';
2778 out
->use
+= written
;
2779 writtentot
+= written
;
2780 out
->content
[out
->use
] = 0;
2784 return(writtentot
? writtentot
: ret
);
2788 * xmlCharEncCloseFunc:
2789 * @handler: char encoding transformation data structure
2791 * Generic front-end for encoding handler close function
2793 * Returns 0 if success, or -1 in case of error
2796 xmlCharEncCloseFunc(xmlCharEncodingHandler
*handler
) {
2799 int i
, handler_in_list
= 0;
2801 /* Avoid unused variable warning if features are disabled. */
2802 (void) handler_in_list
;
2804 if (handler
== NULL
) return(-1);
2805 if (handler
->name
== NULL
) return(-1);
2806 if (handlers
!= NULL
) {
2807 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
2808 if (handler
== handlers
[i
]) {
2809 handler_in_list
= 1;
2814 #ifdef LIBXML_ICONV_ENABLED
2816 * Iconv handlers can be used only once, free the whole block.
2817 * and the associated icon resources.
2819 if ((handler_in_list
== 0) &&
2820 ((handler
->iconv_out
!= NULL
) || (handler
->iconv_in
!= NULL
))) {
2822 if (handler
->iconv_out
!= NULL
) {
2823 if (iconv_close(handler
->iconv_out
))
2825 handler
->iconv_out
= NULL
;
2827 if (handler
->iconv_in
!= NULL
) {
2828 if (iconv_close(handler
->iconv_in
))
2830 handler
->iconv_in
= NULL
;
2833 #endif /* LIBXML_ICONV_ENABLED */
2834 #ifdef LIBXML_ICU_ENABLED
2835 if ((handler_in_list
== 0) &&
2836 ((handler
->uconv_out
!= NULL
) || (handler
->uconv_in
!= NULL
))) {
2838 if (handler
->uconv_out
!= NULL
) {
2839 closeIcuConverter(handler
->uconv_out
);
2840 handler
->uconv_out
= NULL
;
2842 if (handler
->uconv_in
!= NULL
) {
2843 closeIcuConverter(handler
->uconv_in
);
2844 handler
->uconv_in
= NULL
;
2849 /* free up only dynamic handlers iconv/uconv */
2850 if (handler
->name
!= NULL
)
2851 xmlFree(handler
->name
);
2852 handler
->name
= NULL
;
2855 #ifdef DEBUG_ENCODING
2857 xmlGenericError(xmlGenericErrorContext
,
2858 "failed to close the encoding handler\n");
2860 xmlGenericError(xmlGenericErrorContext
,
2861 "closed the encoding handler\n");
2869 * @ctxt: an XML parser context
2871 * This function provides the current index of the parser relative
2872 * to the start of the current entity. This function is computed in
2873 * bytes from the beginning starting at zero and finishing at the
2874 * size in byte of the file if parsing a file. The function is
2875 * of constant cost if the input is UTF-8 but can be costly if run
2876 * on non-UTF-8 input.
2878 * Returns the index in bytes from the beginning of the entity or -1
2879 * in case the index could not be computed.
2882 xmlByteConsumed(xmlParserCtxtPtr ctxt
) {
2883 xmlParserInputPtr in
;
2885 if (ctxt
== NULL
) return(-1);
2887 if (in
== NULL
) return(-1);
2888 if ((in
->buf
!= NULL
) && (in
->buf
->encoder
!= NULL
)) {
2889 unsigned int unused
= 0;
2890 xmlCharEncodingHandler
* handler
= in
->buf
->encoder
;
2892 * Encoding conversion, compute the number of unused original
2893 * bytes from the input not consumed and subtract that from
2894 * the raw consumed value, this is not a cheap operation
2896 if (in
->end
- in
->cur
> 0) {
2897 unsigned char convbuf
[32000];
2898 const unsigned char *cur
= (const unsigned char *)in
->cur
;
2899 int toconv
= in
->end
- in
->cur
, written
= 32000;
2904 toconv
= in
->end
- cur
;
2906 ret
= xmlEncOutputChunk(handler
, &convbuf
[0], &written
,
2916 } while (ret
== -2);
2918 if (in
->buf
->rawconsumed
< unused
)
2920 return(in
->buf
->rawconsumed
- unused
);
2922 return(in
->consumed
+ (in
->cur
- in
->base
));
2925 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2926 #ifdef LIBXML_ISO8859X_ENABLED
2930 * @out: a pointer to an array of bytes to store the result
2931 * @outlen: the length of @out
2932 * @in: a pointer to an array of UTF-8 chars
2933 * @inlen: the length of @in
2934 * @xlattable: the 2-level transcoding table
2936 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2937 * block of chars out.
2939 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2940 * The value of @inlen after return is the number of octets consumed
2941 * as the return value is positive, else unpredictable.
2942 * The value of @outlen after return is the number of octets consumed.
2945 UTF8ToISO8859x(unsigned char* out
, int *outlen
,
2946 const unsigned char* in
, int *inlen
,
2947 const unsigned char* const xlattable
) {
2948 const unsigned char* outstart
= out
;
2949 const unsigned char* inend
;
2950 const unsigned char* instart
= in
;
2951 const unsigned char* processed
= in
;
2953 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
2954 (xlattable
== NULL
))
2958 * initialization nothing to do
2964 inend
= in
+ (*inlen
);
2965 while (in
< inend
) {
2966 unsigned char d
= *in
++;
2969 } else if (d
< 0xC0) {
2970 /* trailing byte in leading position */
2971 *outlen
= out
- outstart
;
2972 *inlen
= processed
- instart
;
2974 } else if (d
< 0xE0) {
2976 if (!(in
< inend
)) {
2977 /* trailing byte not in input buffer */
2978 *outlen
= out
- outstart
;
2979 *inlen
= processed
- instart
;
2983 if ((c
& 0xC0) != 0x80) {
2984 /* not a trailing byte */
2985 *outlen
= out
- outstart
;
2986 *inlen
= processed
- instart
;
2991 d
= xlattable
[48 + c
+ xlattable
[d
] * 64];
2993 /* not in character set */
2994 *outlen
= out
- outstart
;
2995 *inlen
= processed
- instart
;
2999 } else if (d
< 0xF0) {
3002 if (!(in
< inend
- 1)) {
3003 /* trailing bytes not in input buffer */
3004 *outlen
= out
- outstart
;
3005 *inlen
= processed
- instart
;
3009 if ((c1
& 0xC0) != 0x80) {
3010 /* not a trailing byte (c1) */
3011 *outlen
= out
- outstart
;
3012 *inlen
= processed
- instart
;
3016 if ((c2
& 0xC0) != 0x80) {
3017 /* not a trailing byte (c2) */
3018 *outlen
= out
- outstart
;
3019 *inlen
= processed
- instart
;
3025 d
= xlattable
[48 + c2
+ xlattable
[48 + c1
+
3026 xlattable
[32 + d
] * 64] * 64];
3028 /* not in character set */
3029 *outlen
= out
- outstart
;
3030 *inlen
= processed
- instart
;
3035 /* cannot transcode >= U+010000 */
3036 *outlen
= out
- outstart
;
3037 *inlen
= processed
- instart
;
3042 *outlen
= out
- outstart
;
3043 *inlen
= processed
- instart
;
3049 * @out: a pointer to an array of bytes to store the result
3050 * @outlen: the length of @out
3051 * @in: a pointer to an array of ISO Latin 1 chars
3052 * @inlen: the length of @in
3054 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3055 * block of chars out.
3056 * Returns 0 if success, or -1 otherwise
3057 * The value of @inlen after return is the number of octets consumed
3058 * The value of @outlen after return is the number of octets produced.
3061 ISO8859xToUTF8(unsigned char* out
, int *outlen
,
3062 const unsigned char* in
, int *inlen
,
3063 unsigned short const *unicodetable
) {
3064 unsigned char* outstart
= out
;
3065 unsigned char* outend
;
3066 const unsigned char* instart
= in
;
3067 const unsigned char* inend
;
3068 const unsigned char* instop
;
3071 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
3072 (in
== NULL
) || (unicodetable
== NULL
))
3074 outend
= out
+ *outlen
;
3075 inend
= in
+ *inlen
;
3078 while ((in
< inend
) && (out
< outend
- 2)) {
3080 c
= unicodetable
[*in
- 0x80];
3082 /* undefined code point */
3083 *outlen
= out
- outstart
;
3084 *inlen
= in
- instart
;
3088 *out
++ = ((c
>> 6) & 0x1F) | 0xC0;
3089 *out
++ = (c
& 0x3F) | 0x80;
3091 *out
++ = ((c
>> 12) & 0x0F) | 0xE0;
3092 *out
++ = ((c
>> 6) & 0x3F) | 0x80;
3093 *out
++ = (c
& 0x3F) | 0x80;
3097 if (instop
- in
> outend
- out
) instop
= in
+ (outend
- out
);
3098 while ((*in
< 0x80) && (in
< instop
)) {
3102 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
3105 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
3108 *outlen
= out
- outstart
;
3109 *inlen
= in
- instart
;
3114 /************************************************************************
3115 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3116 ************************************************************************/
3118 static unsigned short const xmlunicodetable_ISO8859_2
[128] = {
3119 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3120 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3121 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3122 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3123 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3124 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3125 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3126 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3127 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3128 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3129 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3130 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3131 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3132 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3133 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3134 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3137 static const unsigned char xmltranscodetable_ISO8859_2
[48 + 6 * 64] = {
3138 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3146 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3147 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3148 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3149 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3150 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3153 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3158 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3159 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3160 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3161 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3162 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3163 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3164 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3167 static unsigned short const xmlunicodetable_ISO8859_3
[128] = {
3168 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3169 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3170 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3171 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3172 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3173 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3174 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3175 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3176 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3177 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3178 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3179 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3180 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3181 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3182 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3183 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3186 static const unsigned char xmltranscodetable_ISO8859_3
[48 + 7 * 64] = {
3187 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3195 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3196 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3197 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3199 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3200 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3201 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3214 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3215 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3216 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3217 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3220 static unsigned short const xmlunicodetable_ISO8859_4
[128] = {
3221 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3222 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3223 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3224 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3225 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3226 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3227 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3228 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3229 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3230 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3231 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3232 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3233 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3234 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3235 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3236 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3239 static const unsigned char xmltranscodetable_ISO8859_4
[48 + 6 * 64] = {
3240 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3248 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3249 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3250 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3251 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3252 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3253 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3254 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3255 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3258 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3264 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3265 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3266 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3269 static unsigned short const xmlunicodetable_ISO8859_5
[128] = {
3270 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3271 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3272 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3273 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3274 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3275 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3276 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3277 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3278 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3279 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3280 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3281 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3282 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3283 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3284 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3285 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3288 static const unsigned char xmltranscodetable_ISO8859_5
[48 + 6 * 64] = {
3289 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3297 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3298 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3299 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3300 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3301 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3302 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3304 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 static unsigned short const xmlunicodetable_ISO8859_6
[128] = {
3319 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3320 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3321 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3322 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3323 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3324 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3325 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3326 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3327 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3328 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3329 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3330 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3331 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3332 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3333 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3334 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3337 static const unsigned char xmltranscodetable_ISO8859_6
[48 + 5 * 64] = {
3338 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3346 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3347 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3355 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3356 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3357 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3358 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 static unsigned short const xmlunicodetable_ISO8859_7
[128] = {
3364 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3365 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3366 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3367 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3368 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3369 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3370 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3371 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3372 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3373 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3374 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3375 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3376 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3377 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3378 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3379 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3382 static const unsigned char xmltranscodetable_ISO8859_7
[48 + 7 * 64] = {
3383 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3391 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3392 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3393 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3394 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3407 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3408 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3409 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3410 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 static unsigned short const xmlunicodetable_ISO8859_8
[128] = {
3417 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3418 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3419 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3420 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3421 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3422 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3423 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3424 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3425 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3426 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3427 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3428 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3429 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3430 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3431 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3432 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3435 static const unsigned char xmltranscodetable_ISO8859_8
[48 + 7 * 64] = {
3436 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3444 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3445 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3446 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3447 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3460 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3465 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3466 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 static unsigned short const xmlunicodetable_ISO8859_9
[128] = {
3470 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3471 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3472 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3473 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3474 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3475 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3476 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3477 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3478 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3479 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3480 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3481 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3482 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3483 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3484 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3485 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3488 static const unsigned char xmltranscodetable_ISO8859_9
[48 + 5 * 64] = {
3489 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3497 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3498 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3499 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3500 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3501 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3502 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3503 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514 static unsigned short const xmlunicodetable_ISO8859_10
[128] = {
3515 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3520 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3521 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3522 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3523 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3524 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3525 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3526 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3527 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3528 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3529 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3530 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3533 static const unsigned char xmltranscodetable_ISO8859_10
[48 + 7 * 64] = {
3534 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3544 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3545 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3546 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3547 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3548 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3549 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3552 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3553 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3554 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3562 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3563 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3564 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3567 static unsigned short const xmlunicodetable_ISO8859_11
[128] = {
3568 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3569 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3570 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3571 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3572 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3573 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3574 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3575 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3576 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3577 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3578 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3579 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3580 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3581 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3582 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3583 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3586 static const unsigned char xmltranscodetable_ISO8859_11
[48 + 6 * 64] = {
3587 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3595 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3596 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3602 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3603 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3604 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3605 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3606 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3607 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3611 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 static unsigned short const xmlunicodetable_ISO8859_13
[128] = {
3617 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3618 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3619 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3620 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3621 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3622 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3623 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3624 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3625 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3626 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3627 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3628 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3629 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3630 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3631 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3632 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3635 static const unsigned char xmltranscodetable_ISO8859_13
[48 + 7 * 64] = {
3636 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3644 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3645 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3646 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3647 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3656 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3657 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3658 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3659 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3660 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3661 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3662 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3663 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3664 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3665 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3666 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3669 static unsigned short const xmlunicodetable_ISO8859_14
[128] = {
3670 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3671 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3672 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3673 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3674 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3675 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3676 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3677 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3678 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3679 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3680 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3681 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3682 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3683 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3684 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3685 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3688 static const unsigned char xmltranscodetable_ISO8859_14
[48 + 10 * 64] = {
3689 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3697 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3698 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3699 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3704 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3712 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3724 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3729 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3730 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3731 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3734 static unsigned short const xmlunicodetable_ISO8859_15
[128] = {
3735 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3736 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3737 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3738 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3739 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3740 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3741 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3742 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3743 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3744 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3745 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3746 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3747 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3748 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3749 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3750 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3753 static const unsigned char xmltranscodetable_ISO8859_15
[48 + 6 * 64] = {
3754 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3761 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3762 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3763 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3764 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3765 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3767 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3775 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3776 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3777 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3778 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3779 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3780 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3783 static unsigned short const xmlunicodetable_ISO8859_16
[128] = {
3784 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3785 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3786 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3787 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3788 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3789 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3790 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3791 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3792 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3793 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3794 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3795 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3796 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3797 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3798 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3799 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3802 static const unsigned char xmltranscodetable_ISO8859_16
[48 + 9 * 64] = {
3803 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3811 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3812 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3813 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3814 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3815 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3820 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3822 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3824 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3825 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3826 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3827 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3828 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3830 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3839 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3840 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3841 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3846 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3849 static int ISO8859_2ToUTF8 (unsigned char* out
, int *outlen
,
3850 const unsigned char* in
, int *inlen
) {
3851 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_2
);
3853 static int UTF8ToISO8859_2 (unsigned char* out
, int *outlen
,
3854 const unsigned char* in
, int *inlen
) {
3855 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_2
);
3858 static int ISO8859_3ToUTF8 (unsigned char* out
, int *outlen
,
3859 const unsigned char* in
, int *inlen
) {
3860 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_3
);
3862 static int UTF8ToISO8859_3 (unsigned char* out
, int *outlen
,
3863 const unsigned char* in
, int *inlen
) {
3864 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_3
);
3867 static int ISO8859_4ToUTF8 (unsigned char* out
, int *outlen
,
3868 const unsigned char* in
, int *inlen
) {
3869 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_4
);
3871 static int UTF8ToISO8859_4 (unsigned char* out
, int *outlen
,
3872 const unsigned char* in
, int *inlen
) {
3873 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_4
);
3876 static int ISO8859_5ToUTF8 (unsigned char* out
, int *outlen
,
3877 const unsigned char* in
, int *inlen
) {
3878 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_5
);
3880 static int UTF8ToISO8859_5 (unsigned char* out
, int *outlen
,
3881 const unsigned char* in
, int *inlen
) {
3882 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_5
);
3885 static int ISO8859_6ToUTF8 (unsigned char* out
, int *outlen
,
3886 const unsigned char* in
, int *inlen
) {
3887 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_6
);
3889 static int UTF8ToISO8859_6 (unsigned char* out
, int *outlen
,
3890 const unsigned char* in
, int *inlen
) {
3891 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_6
);
3894 static int ISO8859_7ToUTF8 (unsigned char* out
, int *outlen
,
3895 const unsigned char* in
, int *inlen
) {
3896 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_7
);
3898 static int UTF8ToISO8859_7 (unsigned char* out
, int *outlen
,
3899 const unsigned char* in
, int *inlen
) {
3900 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_7
);
3903 static int ISO8859_8ToUTF8 (unsigned char* out
, int *outlen
,
3904 const unsigned char* in
, int *inlen
) {
3905 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_8
);
3907 static int UTF8ToISO8859_8 (unsigned char* out
, int *outlen
,
3908 const unsigned char* in
, int *inlen
) {
3909 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_8
);
3912 static int ISO8859_9ToUTF8 (unsigned char* out
, int *outlen
,
3913 const unsigned char* in
, int *inlen
) {
3914 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_9
);
3916 static int UTF8ToISO8859_9 (unsigned char* out
, int *outlen
,
3917 const unsigned char* in
, int *inlen
) {
3918 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_9
);
3921 static int ISO8859_10ToUTF8 (unsigned char* out
, int *outlen
,
3922 const unsigned char* in
, int *inlen
) {
3923 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_10
);
3925 static int UTF8ToISO8859_10 (unsigned char* out
, int *outlen
,
3926 const unsigned char* in
, int *inlen
) {
3927 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_10
);
3930 static int ISO8859_11ToUTF8 (unsigned char* out
, int *outlen
,
3931 const unsigned char* in
, int *inlen
) {
3932 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_11
);
3934 static int UTF8ToISO8859_11 (unsigned char* out
, int *outlen
,
3935 const unsigned char* in
, int *inlen
) {
3936 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_11
);
3939 static int ISO8859_13ToUTF8 (unsigned char* out
, int *outlen
,
3940 const unsigned char* in
, int *inlen
) {
3941 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_13
);
3943 static int UTF8ToISO8859_13 (unsigned char* out
, int *outlen
,
3944 const unsigned char* in
, int *inlen
) {
3945 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_13
);
3948 static int ISO8859_14ToUTF8 (unsigned char* out
, int *outlen
,
3949 const unsigned char* in
, int *inlen
) {
3950 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_14
);
3952 static int UTF8ToISO8859_14 (unsigned char* out
, int *outlen
,
3953 const unsigned char* in
, int *inlen
) {
3954 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_14
);
3957 static int ISO8859_15ToUTF8 (unsigned char* out
, int *outlen
,
3958 const unsigned char* in
, int *inlen
) {
3959 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_15
);
3961 static int UTF8ToISO8859_15 (unsigned char* out
, int *outlen
,
3962 const unsigned char* in
, int *inlen
) {
3963 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_15
);
3966 static int ISO8859_16ToUTF8 (unsigned char* out
, int *outlen
,
3967 const unsigned char* in
, int *inlen
) {
3968 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_16
);
3970 static int UTF8ToISO8859_16 (unsigned char* out
, int *outlen
,
3971 const unsigned char* in
, int *inlen
) {
3972 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_16
);
3976 xmlRegisterCharEncodingHandlersISO8859x (void) {
3977 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8
, UTF8ToISO8859_2
);
3978 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8
, UTF8ToISO8859_3
);
3979 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8
, UTF8ToISO8859_4
);
3980 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8
, UTF8ToISO8859_5
);
3981 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8
, UTF8ToISO8859_6
);
3982 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8
, UTF8ToISO8859_7
);
3983 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8
, UTF8ToISO8859_8
);
3984 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8
, UTF8ToISO8859_9
);
3985 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8
, UTF8ToISO8859_10
);
3986 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8
, UTF8ToISO8859_11
);
3987 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8
, UTF8ToISO8859_13
);
3988 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8
, UTF8ToISO8859_14
);
3989 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8
, UTF8ToISO8859_15
);
3990 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8
, UTF8ToISO8859_16
);