2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
31 #ifdef LIBXML_ICONV_ENABLED
35 #include <libxml/encoding.h>
36 #include <libxml/xmlmemory.h>
37 #ifdef LIBXML_HTML_ENABLED
38 #include <libxml/HTMLparser.h>
40 #include <libxml/globals.h>
41 #include <libxml/xmlerror.h>
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
47 #ifdef LIBXML_ICU_ENABLED
48 #include <unicode/ucnv.h>
49 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50 #define ICU_PIVOT_BUF_SIZE 1024
51 typedef struct _uconv_t uconv_t
;
53 UConverter
*uconv
; /* for conversion between an encoding and UTF-16 */
54 UConverter
*utf8
; /* for conversion between UTF-8 and UTF-16 */
55 UChar pivot_buf
[ICU_PIVOT_BUF_SIZE
];
61 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias
;
62 typedef xmlCharEncodingAlias
*xmlCharEncodingAliasPtr
;
63 struct _xmlCharEncodingAlias
{
68 static xmlCharEncodingAliasPtr xmlCharEncodingAliases
= NULL
;
69 static int xmlCharEncodingAliasesNb
= 0;
70 static int xmlCharEncodingAliasesMax
= 0;
72 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
74 #define DEBUG_ENCODING /* Define this to get encoding traces */
79 static int xmlLittleEndian
= 1;
82 * xmlEncodingErrMemory:
83 * @extra: extra information
85 * Handle an out of memory condition
88 xmlEncodingErrMemory(const char *extra
)
90 __xmlSimpleError(XML_FROM_I18N
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
95 * @error: the error number
96 * @msg: the error message
100 static void LIBXML_ATTR_FORMAT(2,0)
101 xmlEncodingErr(xmlParserErrors error
, const char *msg
, const char *val
)
103 __xmlRaiseError(NULL
, NULL
, NULL
, NULL
, NULL
,
104 XML_FROM_I18N
, error
, XML_ERR_FATAL
,
105 NULL
, 0, val
, NULL
, NULL
, 0, 0, msg
, val
);
108 #ifdef LIBXML_ICU_ENABLED
110 openIcuConverter(const char* name
, int toUnicode
)
112 UErrorCode status
= U_ZERO_ERROR
;
113 uconv_t
*conv
= (uconv_t
*) xmlMalloc(sizeof(uconv_t
));
117 conv
->pivot_source
= conv
->pivot_buf
;
118 conv
->pivot_target
= conv
->pivot_buf
;
120 conv
->uconv
= ucnv_open(name
, &status
);
121 if (U_FAILURE(status
))
124 status
= U_ZERO_ERROR
;
126 ucnv_setToUCallBack(conv
->uconv
, UCNV_TO_U_CALLBACK_STOP
,
127 NULL
, NULL
, NULL
, &status
);
130 ucnv_setFromUCallBack(conv
->uconv
, UCNV_FROM_U_CALLBACK_STOP
,
131 NULL
, NULL
, NULL
, &status
);
133 if (U_FAILURE(status
))
136 status
= U_ZERO_ERROR
;
137 conv
->utf8
= ucnv_open("UTF-8", &status
);
138 if (U_SUCCESS(status
))
143 ucnv_close(conv
->uconv
);
149 closeIcuConverter(uconv_t
*conv
)
152 ucnv_close(conv
->uconv
);
153 ucnv_close(conv
->utf8
);
157 #endif /* LIBXML_ICU_ENABLED */
159 /************************************************************************
161 * Conversions To/From UTF8 encoding *
163 ************************************************************************/
167 * @out: a pointer to an array of bytes to store the result
168 * @outlen: the length of @out
169 * @in: a pointer to an array of ASCII chars
170 * @inlen: the length of @in
172 * Take a block of ASCII chars in and try to convert it to an UTF-8
173 * block of chars out.
174 * Returns 0 if success, or -1 otherwise
175 * The value of @inlen after return is the number of octets consumed
176 * if the return value is positive, else unpredictable.
177 * The value of @outlen after return is the number of octets produced.
180 asciiToUTF8(unsigned char* out
, int *outlen
,
181 const unsigned char* in
, int *inlen
) {
182 unsigned char* outstart
= out
;
183 const unsigned char* base
= in
;
184 const unsigned char* processed
= in
;
185 unsigned char* outend
= out
+ *outlen
;
186 const unsigned char* inend
;
189 inend
= in
+ (*inlen
);
190 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
198 *outlen
= out
- outstart
;
199 *inlen
= processed
- base
;
203 processed
= (const unsigned char*) in
;
205 *outlen
= out
- outstart
;
206 *inlen
= processed
- base
;
210 #ifdef LIBXML_OUTPUT_ENABLED
213 * @out: a pointer to an array of bytes to store the result
214 * @outlen: the length of @out
215 * @in: a pointer to an array of UTF-8 chars
216 * @inlen: the length of @in
218 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219 * block of chars out.
221 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222 * The value of @inlen after return is the number of octets consumed
223 * if the return value is positive, else unpredictable.
224 * The value of @outlen after return is the number of octets produced.
227 UTF8Toascii(unsigned char* out
, int *outlen
,
228 const unsigned char* in
, int *inlen
) {
229 const unsigned char* processed
= in
;
230 const unsigned char* outend
;
231 const unsigned char* outstart
= out
;
232 const unsigned char* instart
= in
;
233 const unsigned char* inend
;
237 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
240 * initialization nothing to do
246 inend
= in
+ (*inlen
);
247 outend
= out
+ (*outlen
);
250 if (d
< 0x80) { c
= d
; trailing
= 0; }
252 /* trailing byte in leading position */
253 *outlen
= out
- outstart
;
254 *inlen
= processed
- instart
;
256 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
257 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
258 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
260 /* no chance for this in Ascii */
261 *outlen
= out
- outstart
;
262 *inlen
= processed
- instart
;
266 if (inend
- in
< trailing
) {
270 for ( ; trailing
; trailing
--) {
271 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
277 /* assertion: c is a single UTF-4 value */
283 /* no chance for this in Ascii */
284 *outlen
= out
- outstart
;
285 *inlen
= processed
- instart
;
290 *outlen
= out
- outstart
;
291 *inlen
= processed
- instart
;
294 #endif /* LIBXML_OUTPUT_ENABLED */
298 * @out: a pointer to an array of bytes to store the result
299 * @outlen: the length of @out
300 * @in: a pointer to an array of ISO Latin 1 chars
301 * @inlen: the length of @in
303 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304 * block of chars out.
305 * Returns the number of bytes written if success, or -1 otherwise
306 * The value of @inlen after return is the number of octets consumed
307 * if the return value is positive, else unpredictable.
308 * The value of @outlen after return is the number of octets produced.
311 isolat1ToUTF8(unsigned char* out
, int *outlen
,
312 const unsigned char* in
, int *inlen
) {
313 unsigned char* outstart
= out
;
314 const unsigned char* base
= in
;
315 unsigned char* outend
;
316 const unsigned char* inend
;
317 const unsigned char* instop
;
319 if ((out
== NULL
) || (in
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
))
322 outend
= out
+ *outlen
;
323 inend
= in
+ (*inlen
);
326 while ((in
< inend
) && (out
< outend
- 1)) {
328 *out
++ = (((*in
) >> 6) & 0x1F) | 0xC0;
329 *out
++ = ((*in
) & 0x3F) | 0x80;
332 if ((instop
- in
) > (outend
- out
)) instop
= in
+ (outend
- out
);
333 while ((in
< instop
) && (*in
< 0x80)) {
337 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
340 *outlen
= out
- outstart
;
347 * @out: a pointer to an array of bytes to store the result
348 * @outlen: the length of @out
349 * @inb: a pointer to an array of UTF-8 chars
350 * @inlenb: the length of @in in UTF-8 chars
352 * No op copy operation for UTF8 handling.
354 * Returns the number of bytes written, or -1 if lack of space.
355 * The value of *inlen after return is the number of octets consumed
356 * if the return value is positive, else unpredictable.
359 UTF8ToUTF8(unsigned char* out
, int *outlen
,
360 const unsigned char* inb
, int *inlenb
)
364 if ((out
== NULL
) || (outlen
== NULL
) || (inlenb
== NULL
))
367 /* inb == NULL means output is initialized. */
372 if (*outlen
> *inlenb
) {
381 * FIXME: Conversion functions must assure valid UTF-8, so we have
382 * to check for UTF-8 validity. Preferably, this converter shouldn't
385 memcpy(out
, inb
, len
);
393 #ifdef LIBXML_OUTPUT_ENABLED
396 * @out: a pointer to an array of bytes to store the result
397 * @outlen: the length of @out
398 * @in: a pointer to an array of UTF-8 chars
399 * @inlen: the length of @in
401 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402 * block of chars out.
404 * Returns the number of bytes written if success, -2 if the transcoding fails,
406 * The value of @inlen after return is the number of octets consumed
407 * if the return value is positive, else unpredictable.
408 * The value of @outlen after return is the number of octets produced.
411 UTF8Toisolat1(unsigned char* out
, int *outlen
,
412 const unsigned char* in
, int *inlen
) {
413 const unsigned char* processed
= in
;
414 const unsigned char* outend
;
415 const unsigned char* outstart
= out
;
416 const unsigned char* instart
= in
;
417 const unsigned char* inend
;
421 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
424 * initialization nothing to do
430 inend
= in
+ (*inlen
);
431 outend
= out
+ (*outlen
);
434 if (d
< 0x80) { c
= d
; trailing
= 0; }
436 /* trailing byte in leading position */
437 *outlen
= out
- outstart
;
438 *inlen
= processed
- instart
;
440 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
441 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
442 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
444 /* no chance for this in IsoLat1 */
445 *outlen
= out
- outstart
;
446 *inlen
= processed
- instart
;
450 if (inend
- in
< trailing
) {
454 for ( ; trailing
; trailing
--) {
457 if (((d
= *in
++) & 0xC0) != 0x80) {
458 *outlen
= out
- outstart
;
459 *inlen
= processed
- instart
;
466 /* assertion: c is a single UTF-4 value */
472 /* no chance for this in IsoLat1 */
473 *outlen
= out
- outstart
;
474 *inlen
= processed
- instart
;
479 *outlen
= out
- outstart
;
480 *inlen
= processed
- instart
;
483 #endif /* LIBXML_OUTPUT_ENABLED */
487 * @out: a pointer to an array of bytes to store the result
488 * @outlen: the length of @out
489 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
490 * @inlenb: the length of @in in UTF-16LE chars
492 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493 * block of chars out. This function assumes the endian property
494 * is the same between the native type of this machine and the
497 * Returns the number of bytes written, or -1 if lack of space, or -2
498 * if the transcoding fails (if *in is not a valid utf16 string)
499 * The value of *inlen after return is the number of octets consumed
500 * if the return value is positive, else unpredictable.
503 UTF16LEToUTF8(unsigned char* out
, int *outlen
,
504 const unsigned char* inb
, int *inlenb
)
506 unsigned char* outstart
= out
;
507 const unsigned char* processed
= inb
;
508 unsigned char* outend
;
509 unsigned short* in
= (unsigned short*) inb
;
510 unsigned short* inend
;
511 unsigned int c
, d
, inlen
;
519 outend
= out
+ *outlen
;
520 if ((*inlenb
% 2) == 1)
524 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
525 if (xmlLittleEndian
) {
528 tmp
= (unsigned char *) in
;
533 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
534 if (in
>= inend
) { /* handle split mutli-byte characters */
537 if (xmlLittleEndian
) {
540 tmp
= (unsigned char *) in
;
545 if ((d
& 0xFC00) == 0xDC00) {
552 *outlen
= out
- outstart
;
553 *inlenb
= processed
- inb
;
558 /* assertion: c is a single UTF-4 value */
561 if (c
< 0x80) { *out
++= c
; bits
= -6; }
562 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
563 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
564 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
566 for ( ; bits
>= 0; bits
-= 6) {
569 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
571 processed
= (const unsigned char*) in
;
573 *outlen
= out
- outstart
;
574 *inlenb
= processed
- inb
;
578 #ifdef LIBXML_OUTPUT_ENABLED
581 * @outb: a pointer to an array of bytes to store the result
582 * @outlen: the length of @outb
583 * @in: a pointer to an array of UTF-8 chars
584 * @inlen: the length of @in
586 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587 * block of chars out.
589 * Returns the number of bytes written, or -1 if lack of space, or -2
590 * if the transcoding failed.
593 UTF8ToUTF16LE(unsigned char* outb
, int *outlen
,
594 const unsigned char* in
, int *inlen
)
596 unsigned short* out
= (unsigned short*) outb
;
597 const unsigned char* processed
= in
;
598 const unsigned char *const instart
= in
;
599 unsigned short* outstart
= out
;
600 unsigned short* outend
;
601 const unsigned char* inend
;
605 unsigned short tmp1
, tmp2
;
607 /* UTF16LE encoding has no BOM */
608 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
615 outend
= out
+ (*outlen
/ 2);
618 if (d
< 0x80) { c
= d
; trailing
= 0; }
620 /* trailing byte in leading position */
621 *outlen
= (out
- outstart
) * 2;
622 *inlen
= processed
- instart
;
624 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
625 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
626 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
628 /* no chance for this in UTF-16 */
629 *outlen
= (out
- outstart
) * 2;
630 *inlen
= processed
- instart
;
634 if (inend
- in
< trailing
) {
638 for ( ; trailing
; trailing
--) {
639 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
645 /* assertion: c is a single UTF-4 value */
649 if (xmlLittleEndian
) {
652 tmp
= (unsigned char *) out
;
653 *tmp
= (unsigned char) c
; /* Explicit truncation */
654 *(tmp
+ 1) = c
>> 8 ;
658 else if (c
< 0x110000) {
662 if (xmlLittleEndian
) {
663 *out
++ = 0xD800 | (c
>> 10);
664 *out
++ = 0xDC00 | (c
& 0x03FF);
666 tmp1
= 0xD800 | (c
>> 10);
667 tmp
= (unsigned char *) out
;
668 *tmp
= (unsigned char) tmp1
; /* Explicit truncation */
669 *(tmp
+ 1) = tmp1
>> 8;
672 tmp2
= 0xDC00 | (c
& 0x03FF);
673 tmp
= (unsigned char *) out
;
674 *tmp
= (unsigned char) tmp2
; /* Explicit truncation */
675 *(tmp
+ 1) = tmp2
>> 8;
683 *outlen
= (out
- outstart
) * 2;
684 *inlen
= processed
- instart
;
690 * @outb: a pointer to an array of bytes to store the result
691 * @outlen: the length of @outb
692 * @in: a pointer to an array of UTF-8 chars
693 * @inlen: the length of @in
695 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696 * block of chars out.
698 * Returns the number of bytes written, or -1 if lack of space, or -2
699 * if the transcoding failed.
702 UTF8ToUTF16(unsigned char* outb
, int *outlen
,
703 const unsigned char* in
, int *inlen
)
707 * initialization, add the Byte Order Mark for UTF-16LE
714 #ifdef DEBUG_ENCODING
715 xmlGenericError(xmlGenericErrorContext
,
716 "Added FFFE Byte Order Mark\n");
724 return (UTF8ToUTF16LE(outb
, outlen
, in
, inlen
));
726 #endif /* LIBXML_OUTPUT_ENABLED */
730 * @out: a pointer to an array of bytes to store the result
731 * @outlen: the length of @out
732 * @inb: a pointer to an array of UTF-16 passed as a byte array
733 * @inlenb: the length of @in in UTF-16 chars
735 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736 * block of chars out. This function assumes the endian property
737 * is the same between the native type of this machine and the
740 * Returns the number of bytes written, or -1 if lack of space, or -2
741 * if the transcoding fails (if *in is not a valid utf16 string)
742 * The value of *inlen after return is the number of octets consumed
743 * if the return value is positive, else unpredictable.
746 UTF16BEToUTF8(unsigned char* out
, int *outlen
,
747 const unsigned char* inb
, int *inlenb
)
749 unsigned char* outstart
= out
;
750 const unsigned char* processed
= inb
;
751 unsigned char* outend
;
752 unsigned short* in
= (unsigned short*) inb
;
753 unsigned short* inend
;
754 unsigned int c
, d
, inlen
;
762 outend
= out
+ *outlen
;
763 if ((*inlenb
% 2) == 1)
767 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
768 if (xmlLittleEndian
) {
769 tmp
= (unsigned char *) in
;
776 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
777 if (in
>= inend
) { /* handle split mutli-byte characters */
780 if (xmlLittleEndian
) {
781 tmp
= (unsigned char *) in
;
788 if ((d
& 0xFC00) == 0xDC00) {
795 *outlen
= out
- outstart
;
796 *inlenb
= processed
- inb
;
801 /* assertion: c is a single UTF-4 value */
804 if (c
< 0x80) { *out
++= c
; bits
= -6; }
805 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
806 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
807 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
809 for ( ; bits
>= 0; bits
-= 6) {
812 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
814 processed
= (const unsigned char*) in
;
816 *outlen
= out
- outstart
;
817 *inlenb
= processed
- inb
;
821 #ifdef LIBXML_OUTPUT_ENABLED
824 * @outb: a pointer to an array of bytes to store the result
825 * @outlen: the length of @outb
826 * @in: a pointer to an array of UTF-8 chars
827 * @inlen: the length of @in
829 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830 * block of chars out.
832 * Returns the number of byte written, or -1 by lack of space, or -2
833 * if the transcoding failed.
836 UTF8ToUTF16BE(unsigned char* outb
, int *outlen
,
837 const unsigned char* in
, int *inlen
)
839 unsigned short* out
= (unsigned short*) outb
;
840 const unsigned char* processed
= in
;
841 const unsigned char *const instart
= in
;
842 unsigned short* outstart
= out
;
843 unsigned short* outend
;
844 const unsigned char* inend
;
848 unsigned short tmp1
, tmp2
;
850 /* UTF-16BE has no BOM */
851 if ((outb
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
858 outend
= out
+ (*outlen
/ 2);
861 if (d
< 0x80) { c
= d
; trailing
= 0; }
863 /* trailing byte in leading position */
864 *outlen
= out
- outstart
;
865 *inlen
= processed
- instart
;
867 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
868 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
869 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
871 /* no chance for this in UTF-16 */
872 *outlen
= out
- outstart
;
873 *inlen
= processed
- instart
;
877 if (inend
- in
< trailing
) {
881 for ( ; trailing
; trailing
--) {
882 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80)) break;
887 /* assertion: c is a single UTF-4 value */
889 if (out
>= outend
) break;
890 if (xmlLittleEndian
) {
891 tmp
= (unsigned char *) out
;
893 *(tmp
+ 1) = (unsigned char) c
; /* Explicit truncation */
899 else if (c
< 0x110000) {
900 if (out
+1 >= outend
) break;
902 if (xmlLittleEndian
) {
903 tmp1
= 0xD800 | (c
>> 10);
904 tmp
= (unsigned char *) out
;
906 *(tmp
+ 1) = (unsigned char) tmp1
; /* Explicit truncation */
909 tmp2
= 0xDC00 | (c
& 0x03FF);
910 tmp
= (unsigned char *) out
;
912 *(tmp
+ 1) = (unsigned char) tmp2
; /* Explicit truncation */
915 *out
++ = 0xD800 | (c
>> 10);
916 *out
++ = 0xDC00 | (c
& 0x03FF);
923 *outlen
= (out
- outstart
) * 2;
924 *inlen
= processed
- instart
;
927 #endif /* LIBXML_OUTPUT_ENABLED */
929 /************************************************************************
931 * Generic encoding handling routines *
933 ************************************************************************/
936 * xmlDetectCharEncoding:
937 * @in: a pointer to the first bytes of the XML entity, must be at least
938 * 2 bytes long (at least 4 if encoding is UTF4 variant).
939 * @len: pointer to the length of the buffer
941 * Guess the encoding of the entity using the first bytes of the entity content
942 * according to the non-normative appendix F of the XML-1.0 recommendation.
944 * Returns one of the XML_CHAR_ENCODING_... values.
947 xmlDetectCharEncoding(const unsigned char* in
, int len
)
950 return(XML_CHAR_ENCODING_NONE
);
952 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
953 (in
[2] == 0x00) && (in
[3] == 0x3C))
954 return(XML_CHAR_ENCODING_UCS4BE
);
955 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
956 (in
[2] == 0x00) && (in
[3] == 0x00))
957 return(XML_CHAR_ENCODING_UCS4LE
);
958 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
959 (in
[2] == 0x3C) && (in
[3] == 0x00))
960 return(XML_CHAR_ENCODING_UCS4_2143
);
961 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
962 (in
[2] == 0x00) && (in
[3] == 0x00))
963 return(XML_CHAR_ENCODING_UCS4_3412
);
964 if ((in
[0] == 0x4C) && (in
[1] == 0x6F) &&
965 (in
[2] == 0xA7) && (in
[3] == 0x94))
966 return(XML_CHAR_ENCODING_EBCDIC
);
967 if ((in
[0] == 0x3C) && (in
[1] == 0x3F) &&
968 (in
[2] == 0x78) && (in
[3] == 0x6D))
969 return(XML_CHAR_ENCODING_UTF8
);
971 * Although not part of the recommendation, we also
972 * attempt an "auto-recognition" of UTF-16LE and
973 * UTF-16BE encodings.
975 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
976 (in
[2] == 0x3F) && (in
[3] == 0x00))
977 return(XML_CHAR_ENCODING_UTF16LE
);
978 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
979 (in
[2] == 0x00) && (in
[3] == 0x3F))
980 return(XML_CHAR_ENCODING_UTF16BE
);
984 * Errata on XML-1.0 June 20 2001
985 * We now allow an UTF8 encoded BOM
987 if ((in
[0] == 0xEF) && (in
[1] == 0xBB) &&
989 return(XML_CHAR_ENCODING_UTF8
);
991 /* For UTF-16 we can recognize by the BOM */
993 if ((in
[0] == 0xFE) && (in
[1] == 0xFF))
994 return(XML_CHAR_ENCODING_UTF16BE
);
995 if ((in
[0] == 0xFF) && (in
[1] == 0xFE))
996 return(XML_CHAR_ENCODING_UTF16LE
);
998 return(XML_CHAR_ENCODING_NONE
);
1002 * xmlCleanupEncodingAliases:
1004 * Unregisters all aliases
1007 xmlCleanupEncodingAliases(void) {
1010 if (xmlCharEncodingAliases
== NULL
)
1013 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1014 if (xmlCharEncodingAliases
[i
].name
!= NULL
)
1015 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1016 if (xmlCharEncodingAliases
[i
].alias
!= NULL
)
1017 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1019 xmlCharEncodingAliasesNb
= 0;
1020 xmlCharEncodingAliasesMax
= 0;
1021 xmlFree(xmlCharEncodingAliases
);
1022 xmlCharEncodingAliases
= NULL
;
1026 * xmlGetEncodingAlias:
1027 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1029 * Lookup an encoding name for the given alias.
1031 * Returns NULL if not found, otherwise the original name
1034 xmlGetEncodingAlias(const char *alias
) {
1041 if (xmlCharEncodingAliases
== NULL
)
1044 for (i
= 0;i
< 99;i
++) {
1045 upper
[i
] = (char) toupper((unsigned char) alias
[i
]);
1046 if (upper
[i
] == 0) break;
1051 * Walk down the list looking for a definition of the alias
1053 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1054 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1055 return(xmlCharEncodingAliases
[i
].name
);
1062 * xmlAddEncodingAlias:
1063 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1064 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1066 * Registers an alias @alias for an encoding named @name. Existing alias
1067 * will be overwritten.
1069 * Returns 0 in case of success, -1 in case of error
1072 xmlAddEncodingAlias(const char *name
, const char *alias
) {
1076 if ((name
== NULL
) || (alias
== NULL
))
1079 for (i
= 0;i
< 99;i
++) {
1080 upper
[i
] = (char) toupper((unsigned char) alias
[i
]);
1081 if (upper
[i
] == 0) break;
1085 if (xmlCharEncodingAliases
== NULL
) {
1086 xmlCharEncodingAliasesNb
= 0;
1087 xmlCharEncodingAliasesMax
= 20;
1088 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1089 xmlMalloc(xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1090 if (xmlCharEncodingAliases
== NULL
)
1092 } else if (xmlCharEncodingAliasesNb
>= xmlCharEncodingAliasesMax
) {
1093 xmlCharEncodingAliasesMax
*= 2;
1094 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1095 xmlRealloc(xmlCharEncodingAliases
,
1096 xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1099 * Walk down the list looking for a definition of the alias
1101 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1102 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1104 * Replace the definition.
1106 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1107 xmlCharEncodingAliases
[i
].name
= xmlMemStrdup(name
);
1112 * Add the definition
1114 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].name
= xmlMemStrdup(name
);
1115 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].alias
= xmlMemStrdup(upper
);
1116 xmlCharEncodingAliasesNb
++;
1121 * xmlDelEncodingAlias:
1122 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1124 * Unregisters an encoding alias @alias
1126 * Returns 0 in case of success, -1 in case of error
1129 xmlDelEncodingAlias(const char *alias
) {
1135 if (xmlCharEncodingAliases
== NULL
)
1138 * Walk down the list looking for a definition of the alias
1140 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1141 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, alias
)) {
1142 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1143 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1144 xmlCharEncodingAliasesNb
--;
1145 memmove(&xmlCharEncodingAliases
[i
], &xmlCharEncodingAliases
[i
+ 1],
1146 sizeof(xmlCharEncodingAlias
) * (xmlCharEncodingAliasesNb
- i
));
1154 * xmlParseCharEncoding:
1155 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1157 * Compare the string to the encoding schemes already known. Note
1158 * that the comparison is case insensitive accordingly to the section
1159 * [XML] 4.3.3 Character Encoding in Entities.
1161 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162 * if not recognized.
1165 xmlParseCharEncoding(const char* name
)
1172 return(XML_CHAR_ENCODING_NONE
);
1175 * Do the alias resolution
1177 alias
= xmlGetEncodingAlias(name
);
1181 for (i
= 0;i
< 499;i
++) {
1182 upper
[i
] = (char) toupper((unsigned char) name
[i
]);
1183 if (upper
[i
] == 0) break;
1187 if (!strcmp(upper
, "")) return(XML_CHAR_ENCODING_NONE
);
1188 if (!strcmp(upper
, "UTF-8")) return(XML_CHAR_ENCODING_UTF8
);
1189 if (!strcmp(upper
, "UTF8")) return(XML_CHAR_ENCODING_UTF8
);
1192 * NOTE: if we were able to parse this, the endianness of UTF16 is
1193 * already found and in use
1195 if (!strcmp(upper
, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE
);
1196 if (!strcmp(upper
, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE
);
1198 if (!strcmp(upper
, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1199 if (!strcmp(upper
, "UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1200 if (!strcmp(upper
, "UCS2")) return(XML_CHAR_ENCODING_UCS2
);
1203 * NOTE: if we were able to parse this, the endianness of UCS4 is
1204 * already found and in use
1206 if (!strcmp(upper
, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1207 if (!strcmp(upper
, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1208 if (!strcmp(upper
, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE
);
1211 if (!strcmp(upper
, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1
);
1212 if (!strcmp(upper
, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1
);
1213 if (!strcmp(upper
, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1
);
1215 if (!strcmp(upper
, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2
);
1216 if (!strcmp(upper
, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2
);
1217 if (!strcmp(upper
, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2
);
1219 if (!strcmp(upper
, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3
);
1220 if (!strcmp(upper
, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4
);
1221 if (!strcmp(upper
, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5
);
1222 if (!strcmp(upper
, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6
);
1223 if (!strcmp(upper
, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7
);
1224 if (!strcmp(upper
, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8
);
1225 if (!strcmp(upper
, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9
);
1227 if (!strcmp(upper
, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP
);
1228 if (!strcmp(upper
, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS
);
1229 if (!strcmp(upper
, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP
);
1231 #ifdef DEBUG_ENCODING
1232 xmlGenericError(xmlGenericErrorContext
, "Unknown encoding %s\n", name
);
1234 return(XML_CHAR_ENCODING_ERROR
);
1238 * xmlGetCharEncodingName:
1239 * @enc: the encoding
1241 * The "canonical" name for XML encoding.
1242 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243 * Section 4.3.3 Character Encoding in Entities
1245 * Returns the canonical name for the given encoding
1249 xmlGetCharEncodingName(xmlCharEncoding enc
) {
1251 case XML_CHAR_ENCODING_ERROR
:
1253 case XML_CHAR_ENCODING_NONE
:
1255 case XML_CHAR_ENCODING_UTF8
:
1257 case XML_CHAR_ENCODING_UTF16LE
:
1259 case XML_CHAR_ENCODING_UTF16BE
:
1261 case XML_CHAR_ENCODING_EBCDIC
:
1263 case XML_CHAR_ENCODING_UCS4LE
:
1264 return("ISO-10646-UCS-4");
1265 case XML_CHAR_ENCODING_UCS4BE
:
1266 return("ISO-10646-UCS-4");
1267 case XML_CHAR_ENCODING_UCS4_2143
:
1268 return("ISO-10646-UCS-4");
1269 case XML_CHAR_ENCODING_UCS4_3412
:
1270 return("ISO-10646-UCS-4");
1271 case XML_CHAR_ENCODING_UCS2
:
1272 return("ISO-10646-UCS-2");
1273 case XML_CHAR_ENCODING_8859_1
:
1274 return("ISO-8859-1");
1275 case XML_CHAR_ENCODING_8859_2
:
1276 return("ISO-8859-2");
1277 case XML_CHAR_ENCODING_8859_3
:
1278 return("ISO-8859-3");
1279 case XML_CHAR_ENCODING_8859_4
:
1280 return("ISO-8859-4");
1281 case XML_CHAR_ENCODING_8859_5
:
1282 return("ISO-8859-5");
1283 case XML_CHAR_ENCODING_8859_6
:
1284 return("ISO-8859-6");
1285 case XML_CHAR_ENCODING_8859_7
:
1286 return("ISO-8859-7");
1287 case XML_CHAR_ENCODING_8859_8
:
1288 return("ISO-8859-8");
1289 case XML_CHAR_ENCODING_8859_9
:
1290 return("ISO-8859-9");
1291 case XML_CHAR_ENCODING_2022_JP
:
1292 return("ISO-2022-JP");
1293 case XML_CHAR_ENCODING_SHIFT_JIS
:
1294 return("Shift-JIS");
1295 case XML_CHAR_ENCODING_EUC_JP
:
1297 case XML_CHAR_ENCODING_ASCII
:
1303 /************************************************************************
1305 * Char encoding handlers *
1307 ************************************************************************/
1309 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310 defined(LIBXML_ISO8859X_ENABLED)
1312 #define DECLARE_ISO_FUNCS(n) \
1313 static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314 const unsigned char* in, int *inlen); \
1315 static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316 const unsigned char* in, int *inlen);
1319 DECLARE_ISO_FUNCS(2)
1320 DECLARE_ISO_FUNCS(3)
1321 DECLARE_ISO_FUNCS(4)
1322 DECLARE_ISO_FUNCS(5)
1323 DECLARE_ISO_FUNCS(6)
1324 DECLARE_ISO_FUNCS(7)
1325 DECLARE_ISO_FUNCS(8)
1326 DECLARE_ISO_FUNCS(9)
1327 DECLARE_ISO_FUNCS(10)
1328 DECLARE_ISO_FUNCS(11)
1329 DECLARE_ISO_FUNCS(13)
1330 DECLARE_ISO_FUNCS(14)
1331 DECLARE_ISO_FUNCS(15)
1332 DECLARE_ISO_FUNCS(16)
1335 #endif /* LIBXML_ISO8859X_ENABLED */
1337 #ifdef LIBXML_ICONV_ENABLED
1338 #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1343 #ifdef LIBXML_ICU_ENABLED
1344 #define EMPTY_UCONV , NULL, NULL
1349 #define MAKE_HANDLER(name, in, out) \
1350 { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1352 static const xmlCharEncodingHandler defaultHandlers
[] = {
1353 MAKE_HANDLER("UTF-8", UTF8ToUTF8
, UTF8ToUTF8
)
1354 #ifdef LIBXML_OUTPUT_ENABLED
1355 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8
, UTF8ToUTF16LE
)
1356 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8
, UTF8ToUTF16BE
)
1357 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8
, UTF8ToUTF16
)
1358 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8
, UTF8Toisolat1
)
1359 ,MAKE_HANDLER("ASCII", asciiToUTF8
, UTF8Toascii
)
1360 ,MAKE_HANDLER("US-ASCII", asciiToUTF8
, UTF8Toascii
)
1361 #ifdef LIBXML_HTML_ENABLED
1362 ,MAKE_HANDLER("HTML", NULL
, UTF8ToHtml
)
1365 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8
, NULL
)
1366 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8
, NULL
)
1367 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8
, NULL
)
1368 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8
, NULL
)
1369 ,MAKE_HANDLER("ASCII", asciiToUTF8
, NULL
)
1370 ,MAKE_HANDLER("US-ASCII", asciiToUTF8
, NULL
)
1371 #endif /* LIBXML_OUTPUT_ENABLED */
1373 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374 defined(LIBXML_ISO8859X_ENABLED)
1375 ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8
, UTF8ToISO8859_2
)
1376 ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8
, UTF8ToISO8859_3
)
1377 ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8
, UTF8ToISO8859_4
)
1378 ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8
, UTF8ToISO8859_5
)
1379 ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8
, UTF8ToISO8859_6
)
1380 ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8
, UTF8ToISO8859_7
)
1381 ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8
, UTF8ToISO8859_8
)
1382 ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8
, UTF8ToISO8859_9
)
1383 ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8
, UTF8ToISO8859_10
)
1384 ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8
, UTF8ToISO8859_11
)
1385 ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8
, UTF8ToISO8859_13
)
1386 ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8
, UTF8ToISO8859_14
)
1387 ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8
, UTF8ToISO8859_15
)
1388 ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8
, UTF8ToISO8859_16
)
1392 #define NUM_DEFAULT_HANDLERS \
1393 (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1395 static const xmlCharEncodingHandler
*xmlUTF16LEHandler
= &defaultHandlers
[1];
1396 static const xmlCharEncodingHandler
*xmlUTF16BEHandler
= &defaultHandlers
[2];
1398 /* the size should be growable, but it's not a big deal ... */
1399 #define MAX_ENCODING_HANDLERS 50
1400 static xmlCharEncodingHandlerPtr
*handlers
= NULL
;
1401 static int nbCharEncodingHandler
= 0;
1404 * xmlNewCharEncodingHandler:
1405 * @name: the encoding name, in UTF-8 format (ASCII actually)
1406 * @input: the xmlCharEncodingInputFunc to read that encoding
1407 * @output: the xmlCharEncodingOutputFunc to write that encoding
1409 * Create and registers an xmlCharEncodingHandler.
1411 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1413 xmlCharEncodingHandlerPtr
1414 xmlNewCharEncodingHandler(const char *name
,
1415 xmlCharEncodingInputFunc input
,
1416 xmlCharEncodingOutputFunc output
) {
1417 xmlCharEncodingHandlerPtr handler
;
1424 * Do the alias resolution
1426 alias
= xmlGetEncodingAlias(name
);
1431 * Keep only the uppercase version of the encoding.
1434 xmlEncodingErr(XML_I18N_NO_NAME
,
1435 "xmlNewCharEncodingHandler : no name !\n", NULL
);
1438 for (i
= 0;i
< 499;i
++) {
1439 upper
[i
] = (char) toupper((unsigned char) name
[i
]);
1440 if (upper
[i
] == 0) break;
1443 up
= xmlMemStrdup(upper
);
1445 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1450 * allocate and fill-up an handler block.
1452 handler
= (xmlCharEncodingHandlerPtr
)
1453 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1454 if (handler
== NULL
) {
1456 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1459 memset(handler
, 0, sizeof(xmlCharEncodingHandler
));
1460 handler
->input
= input
;
1461 handler
->output
= output
;
1464 #ifdef LIBXML_ICONV_ENABLED
1465 handler
->iconv_in
= NULL
;
1466 handler
->iconv_out
= NULL
;
1468 #ifdef LIBXML_ICU_ENABLED
1469 handler
->uconv_in
= NULL
;
1470 handler
->uconv_out
= NULL
;
1474 * registers and returns the handler.
1476 xmlRegisterCharEncodingHandler(handler
);
1477 #ifdef DEBUG_ENCODING
1478 xmlGenericError(xmlGenericErrorContext
,
1479 "Registered encoding handler for %s\n", name
);
1485 * xmlInitCharEncodingHandlers:
1487 * DEPRECATED: Alias for xmlInitParser.
1490 xmlInitCharEncodingHandlers(void) {
1495 * xmlInitEncodingInternal:
1497 * Initialize the char encoding support.
1500 xmlInitEncodingInternal(void) {
1501 unsigned short int tst
= 0x1234;
1502 unsigned char *ptr
= (unsigned char *) &tst
;
1504 if (*ptr
== 0x12) xmlLittleEndian
= 0;
1505 else if (*ptr
== 0x34) xmlLittleEndian
= 1;
1507 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1508 "Odd problem at endianness detection\n", NULL
);
1513 * xmlCleanupCharEncodingHandlers:
1515 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516 * to free global state but see the warnings there. xmlCleanupParser
1517 * should be only called once at program exit. In most cases, you don't
1518 * have call cleanup functions at all.
1520 * Cleanup the memory allocated for the char encoding support, it
1521 * unregisters all the encoding handlers and the aliases.
1524 xmlCleanupCharEncodingHandlers(void) {
1525 xmlCleanupEncodingAliases();
1527 if (handlers
== NULL
) return;
1529 for (;nbCharEncodingHandler
> 0;) {
1530 nbCharEncodingHandler
--;
1531 if (handlers
[nbCharEncodingHandler
] != NULL
) {
1532 if (handlers
[nbCharEncodingHandler
]->name
!= NULL
)
1533 xmlFree(handlers
[nbCharEncodingHandler
]->name
);
1534 xmlFree(handlers
[nbCharEncodingHandler
]);
1539 nbCharEncodingHandler
= 0;
1543 * xmlRegisterCharEncodingHandler:
1544 * @handler: the xmlCharEncodingHandlerPtr handler block
1546 * Register the char encoding handler, surprising, isn't it ?
1549 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler
) {
1550 if (handler
== NULL
) {
1551 xmlEncodingErr(XML_I18N_NO_HANDLER
,
1552 "xmlRegisterCharEncodingHandler: NULL handler\n", NULL
);
1555 if (handlers
== NULL
) {
1556 handlers
= xmlMalloc(MAX_ENCODING_HANDLERS
* sizeof(handlers
[0]));
1557 if (handlers
== NULL
) {
1558 xmlEncodingErrMemory("allocating handler table");
1563 if (nbCharEncodingHandler
>= MAX_ENCODING_HANDLERS
) {
1564 xmlEncodingErr(XML_I18N_EXCESS_HANDLER
,
1565 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566 "MAX_ENCODING_HANDLERS");
1569 handlers
[nbCharEncodingHandler
++] = handler
;
1573 if (handler
!= NULL
) {
1574 if (handler
->name
!= NULL
) {
1575 xmlFree(handler
->name
);
1582 * xmlGetCharEncodingHandler:
1583 * @enc: an xmlCharEncoding value.
1585 * Search in the registered set the handler able to read/write that encoding.
1587 * Returns the handler or NULL if not found
1589 xmlCharEncodingHandlerPtr
1590 xmlGetCharEncodingHandler(xmlCharEncoding enc
) {
1591 xmlCharEncodingHandlerPtr handler
;
1594 case XML_CHAR_ENCODING_ERROR
:
1596 case XML_CHAR_ENCODING_NONE
:
1598 case XML_CHAR_ENCODING_UTF8
:
1600 case XML_CHAR_ENCODING_UTF16LE
:
1601 return((xmlCharEncodingHandlerPtr
) xmlUTF16LEHandler
);
1602 case XML_CHAR_ENCODING_UTF16BE
:
1603 return((xmlCharEncodingHandlerPtr
) xmlUTF16BEHandler
);
1604 case XML_CHAR_ENCODING_EBCDIC
:
1605 handler
= xmlFindCharEncodingHandler("EBCDIC");
1606 if (handler
!= NULL
) return(handler
);
1607 handler
= xmlFindCharEncodingHandler("ebcdic");
1608 if (handler
!= NULL
) return(handler
);
1609 handler
= xmlFindCharEncodingHandler("EBCDIC-US");
1610 if (handler
!= NULL
) return(handler
);
1611 handler
= xmlFindCharEncodingHandler("IBM-037");
1612 if (handler
!= NULL
) return(handler
);
1614 case XML_CHAR_ENCODING_UCS4BE
:
1615 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616 if (handler
!= NULL
) return(handler
);
1617 handler
= xmlFindCharEncodingHandler("UCS-4");
1618 if (handler
!= NULL
) return(handler
);
1619 handler
= xmlFindCharEncodingHandler("UCS4");
1620 if (handler
!= NULL
) return(handler
);
1622 case XML_CHAR_ENCODING_UCS4LE
:
1623 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624 if (handler
!= NULL
) return(handler
);
1625 handler
= xmlFindCharEncodingHandler("UCS-4");
1626 if (handler
!= NULL
) return(handler
);
1627 handler
= xmlFindCharEncodingHandler("UCS4");
1628 if (handler
!= NULL
) return(handler
);
1630 case XML_CHAR_ENCODING_UCS4_2143
:
1632 case XML_CHAR_ENCODING_UCS4_3412
:
1634 case XML_CHAR_ENCODING_UCS2
:
1635 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636 if (handler
!= NULL
) return(handler
);
1637 handler
= xmlFindCharEncodingHandler("UCS-2");
1638 if (handler
!= NULL
) return(handler
);
1639 handler
= xmlFindCharEncodingHandler("UCS2");
1640 if (handler
!= NULL
) return(handler
);
1644 * We used to keep ISO Latin encodings native in the
1645 * generated data. This led to so many problems that
1646 * this has been removed. One can still change this
1647 * back by registering no-ops encoders for those
1649 case XML_CHAR_ENCODING_8859_1
:
1650 handler
= xmlFindCharEncodingHandler("ISO-8859-1");
1651 if (handler
!= NULL
) return(handler
);
1653 case XML_CHAR_ENCODING_8859_2
:
1654 handler
= xmlFindCharEncodingHandler("ISO-8859-2");
1655 if (handler
!= NULL
) return(handler
);
1657 case XML_CHAR_ENCODING_8859_3
:
1658 handler
= xmlFindCharEncodingHandler("ISO-8859-3");
1659 if (handler
!= NULL
) return(handler
);
1661 case XML_CHAR_ENCODING_8859_4
:
1662 handler
= xmlFindCharEncodingHandler("ISO-8859-4");
1663 if (handler
!= NULL
) return(handler
);
1665 case XML_CHAR_ENCODING_8859_5
:
1666 handler
= xmlFindCharEncodingHandler("ISO-8859-5");
1667 if (handler
!= NULL
) return(handler
);
1669 case XML_CHAR_ENCODING_8859_6
:
1670 handler
= xmlFindCharEncodingHandler("ISO-8859-6");
1671 if (handler
!= NULL
) return(handler
);
1673 case XML_CHAR_ENCODING_8859_7
:
1674 handler
= xmlFindCharEncodingHandler("ISO-8859-7");
1675 if (handler
!= NULL
) return(handler
);
1677 case XML_CHAR_ENCODING_8859_8
:
1678 handler
= xmlFindCharEncodingHandler("ISO-8859-8");
1679 if (handler
!= NULL
) return(handler
);
1681 case XML_CHAR_ENCODING_8859_9
:
1682 handler
= xmlFindCharEncodingHandler("ISO-8859-9");
1683 if (handler
!= NULL
) return(handler
);
1687 case XML_CHAR_ENCODING_2022_JP
:
1688 handler
= xmlFindCharEncodingHandler("ISO-2022-JP");
1689 if (handler
!= NULL
) return(handler
);
1691 case XML_CHAR_ENCODING_SHIFT_JIS
:
1692 handler
= xmlFindCharEncodingHandler("SHIFT-JIS");
1693 if (handler
!= NULL
) return(handler
);
1694 handler
= xmlFindCharEncodingHandler("SHIFT_JIS");
1695 if (handler
!= NULL
) return(handler
);
1696 handler
= xmlFindCharEncodingHandler("Shift_JIS");
1697 if (handler
!= NULL
) return(handler
);
1699 case XML_CHAR_ENCODING_EUC_JP
:
1700 handler
= xmlFindCharEncodingHandler("EUC-JP");
1701 if (handler
!= NULL
) return(handler
);
1707 #ifdef DEBUG_ENCODING
1708 xmlGenericError(xmlGenericErrorContext
,
1709 "No handler found for encoding %d\n", enc
);
1715 * xmlFindCharEncodingHandler:
1716 * @name: a string describing the char encoding.
1718 * Search in the registered set the handler able to read/write that encoding
1719 * or create a new one.
1721 * Returns the handler or NULL if not found
1723 xmlCharEncodingHandlerPtr
1724 xmlFindCharEncodingHandler(const char *name
) {
1727 xmlCharEncoding alias
;
1728 #ifdef LIBXML_ICONV_ENABLED
1729 xmlCharEncodingHandlerPtr enc
;
1730 iconv_t icv_in
, icv_out
;
1731 #endif /* LIBXML_ICONV_ENABLED */
1732 #ifdef LIBXML_ICU_ENABLED
1733 xmlCharEncodingHandlerPtr encu
;
1734 uconv_t
*ucv_in
, *ucv_out
;
1735 #endif /* LIBXML_ICU_ENABLED */
1739 if (name
== NULL
) return(NULL
);
1740 if (name
[0] == 0) return(NULL
);
1743 * Do the alias resolution
1746 nalias
= xmlGetEncodingAlias(name
);
1751 * Check first for directly registered encoding names
1753 for (i
= 0;i
< 99;i
++) {
1754 upper
[i
] = (char) toupper((unsigned char) name
[i
]);
1755 if (upper
[i
] == 0) break;
1759 for (i
= 0; i
< (int) NUM_DEFAULT_HANDLERS
; i
++) {
1760 if (strcmp(upper
, defaultHandlers
[i
].name
) == 0)
1761 return((xmlCharEncodingHandlerPtr
) &defaultHandlers
[i
]);
1764 if (handlers
!= NULL
) {
1765 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
1766 if (!strcmp(upper
, handlers
[i
]->name
)) {
1767 #ifdef DEBUG_ENCODING
1768 xmlGenericError(xmlGenericErrorContext
,
1769 "Found registered handler for encoding %s\n", name
);
1771 return(handlers
[i
]);
1776 #ifdef LIBXML_ICONV_ENABLED
1777 /* check whether iconv can handle this */
1778 icv_in
= iconv_open("UTF-8", name
);
1779 icv_out
= iconv_open(name
, "UTF-8");
1780 if (icv_in
== (iconv_t
) -1) {
1781 icv_in
= iconv_open("UTF-8", upper
);
1783 if (icv_out
== (iconv_t
) -1) {
1784 icv_out
= iconv_open(upper
, "UTF-8");
1786 if ((icv_in
!= (iconv_t
) -1) && (icv_out
!= (iconv_t
) -1)) {
1787 enc
= (xmlCharEncodingHandlerPtr
)
1788 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1790 iconv_close(icv_in
);
1791 iconv_close(icv_out
);
1794 memset(enc
, 0, sizeof(xmlCharEncodingHandler
));
1795 enc
->name
= xmlMemStrdup(name
);
1796 if (enc
->name
== NULL
) {
1798 iconv_close(icv_in
);
1799 iconv_close(icv_out
);
1804 enc
->iconv_in
= icv_in
;
1805 enc
->iconv_out
= icv_out
;
1806 #ifdef DEBUG_ENCODING
1807 xmlGenericError(xmlGenericErrorContext
,
1808 "Found iconv handler for encoding %s\n", name
);
1811 } else if ((icv_in
!= (iconv_t
) -1) || icv_out
!= (iconv_t
) -1) {
1812 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1813 "iconv : problems with filters for '%s'\n", name
);
1814 if (icv_in
!= (iconv_t
) -1)
1815 iconv_close(icv_in
);
1817 iconv_close(icv_out
);
1819 #endif /* LIBXML_ICONV_ENABLED */
1820 #ifdef LIBXML_ICU_ENABLED
1821 /* check whether icu can handle this */
1822 ucv_in
= openIcuConverter(name
, 1);
1823 ucv_out
= openIcuConverter(name
, 0);
1824 if (ucv_in
!= NULL
&& ucv_out
!= NULL
) {
1825 encu
= (xmlCharEncodingHandlerPtr
)
1826 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1828 closeIcuConverter(ucv_in
);
1829 closeIcuConverter(ucv_out
);
1832 memset(encu
, 0, sizeof(xmlCharEncodingHandler
));
1833 encu
->name
= xmlMemStrdup(name
);
1834 if (encu
->name
== NULL
) {
1836 closeIcuConverter(ucv_in
);
1837 closeIcuConverter(ucv_out
);
1841 encu
->output
= NULL
;
1842 encu
->uconv_in
= ucv_in
;
1843 encu
->uconv_out
= ucv_out
;
1844 #ifdef DEBUG_ENCODING
1845 xmlGenericError(xmlGenericErrorContext
,
1846 "Found ICU converter handler for encoding %s\n", name
);
1849 } else if (ucv_in
!= NULL
|| ucv_out
!= NULL
) {
1850 closeIcuConverter(ucv_in
);
1851 closeIcuConverter(ucv_out
);
1852 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1853 "ICU converter : problems with filters for '%s'\n", name
);
1855 #endif /* LIBXML_ICU_ENABLED */
1857 #ifdef DEBUG_ENCODING
1858 xmlGenericError(xmlGenericErrorContext
,
1859 "No handler found for encoding %s\n", name
);
1863 * Fallback using the canonical names
1865 alias
= xmlParseCharEncoding(norig
);
1866 if (alias
!= XML_CHAR_ENCODING_ERROR
) {
1868 canon
= xmlGetCharEncodingName(alias
);
1869 if ((canon
!= NULL
) && (strcmp(name
, canon
))) {
1870 return(xmlFindCharEncodingHandler(canon
));
1874 /* If "none of the above", give up */
1878 /************************************************************************
1880 * ICONV based generic conversion functions *
1882 ************************************************************************/
1884 #ifdef LIBXML_ICONV_ENABLED
1887 * @cd: iconv converter data structure
1888 * @out: a pointer to an array of bytes to store the result
1889 * @outlen: the length of @out
1890 * @in: a pointer to an array of input bytes
1891 * @inlen: the length of @in
1893 * Returns 0 if success, or
1894 * -1 by lack of space, or
1895 * -2 if the transcoding fails (for *in is not valid utf8 string or
1896 * the result of transformation can't fit into the encoding we want), or
1897 * -3 if there the last byte can't form a single output char.
1899 * The value of @inlen after return is the number of octets consumed
1900 * as the return value is positive, else unpredictable.
1901 * The value of @outlen after return is the number of octets produced.
1904 xmlIconvWrapper(iconv_t cd
, unsigned char *out
, int *outlen
,
1905 const unsigned char *in
, int *inlen
) {
1906 size_t icv_inlen
, icv_outlen
;
1907 const char *icv_in
= (const char *) in
;
1908 char *icv_out
= (char *) out
;
1911 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1912 if (outlen
!= NULL
) *outlen
= 0;
1916 icv_outlen
= *outlen
;
1918 * Some versions take const, other versions take non-const input.
1920 ret
= iconv(cd
, (void *) &icv_in
, &icv_inlen
, &icv_out
, &icv_outlen
);
1921 *inlen
-= icv_inlen
;
1922 *outlen
-= icv_outlen
;
1923 if ((icv_inlen
!= 0) || (ret
== (size_t) -1)) {
1925 if (errno
== EILSEQ
) {
1930 if (errno
== E2BIG
) {
1935 if (errno
== EINVAL
) {
1945 #endif /* LIBXML_ICONV_ENABLED */
1947 /************************************************************************
1949 * ICU based generic conversion functions *
1951 ************************************************************************/
1953 #ifdef LIBXML_ICU_ENABLED
1956 * @cd: ICU uconverter data structure
1957 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1958 * @out: a pointer to an array of bytes to store the result
1959 * @outlen: the length of @out
1960 * @in: a pointer to an array of input bytes
1961 * @inlen: the length of @in
1962 * @flush: if true, indicates end of input
1964 * Returns 0 if success, or
1965 * -1 by lack of space, or
1966 * -2 if the transcoding fails (for *in is not valid utf8 string or
1967 * the result of transformation can't fit into the encoding we want), or
1968 * -3 if there the last byte can't form a single output char.
1970 * The value of @inlen after return is the number of octets consumed
1971 * as the return value is positive, else unpredictable.
1972 * The value of @outlen after return is the number of octets produced.
1975 xmlUconvWrapper(uconv_t
*cd
, int toUnicode
, unsigned char *out
, int *outlen
,
1976 const unsigned char *in
, int *inlen
, int flush
) {
1977 const char *ucv_in
= (const char *) in
;
1978 char *ucv_out
= (char *) out
;
1979 UErrorCode err
= U_ZERO_ERROR
;
1981 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1982 if (outlen
!= NULL
) *outlen
= 0;
1987 /* encoding => UTF-16 => UTF-8 */
1988 ucnv_convertEx(cd
->utf8
, cd
->uconv
, &ucv_out
, ucv_out
+ *outlen
,
1989 &ucv_in
, ucv_in
+ *inlen
, cd
->pivot_buf
,
1990 &cd
->pivot_source
, &cd
->pivot_target
,
1991 cd
->pivot_buf
+ ICU_PIVOT_BUF_SIZE
, 0, flush
, &err
);
1993 /* UTF-8 => UTF-16 => encoding */
1994 ucnv_convertEx(cd
->uconv
, cd
->utf8
, &ucv_out
, ucv_out
+ *outlen
,
1995 &ucv_in
, ucv_in
+ *inlen
, cd
->pivot_buf
,
1996 &cd
->pivot_source
, &cd
->pivot_target
,
1997 cd
->pivot_buf
+ ICU_PIVOT_BUF_SIZE
, 0, flush
, &err
);
1999 *inlen
= ucv_in
- (const char*) in
;
2000 *outlen
= ucv_out
- (char *) out
;
2001 if (U_SUCCESS(err
)) {
2002 /* reset pivot buf if this is the last call for input (flush==TRUE) */
2004 cd
->pivot_source
= cd
->pivot_target
= cd
->pivot_buf
;
2007 if (err
== U_BUFFER_OVERFLOW_ERROR
)
2009 if (err
== U_INVALID_CHAR_FOUND
|| err
== U_ILLEGAL_CHAR_FOUND
)
2013 #endif /* LIBXML_ICU_ENABLED */
2015 /************************************************************************
2017 * The real API used by libxml for on-the-fly conversion *
2019 ************************************************************************/
2023 * @handler: encoding handler
2024 * @out: a pointer to an array of bytes to store the result
2025 * @outlen: the length of @out
2026 * @in: a pointer to an array of input bytes
2027 * @inlen: the length of @in
2028 * @flush: flush (ICU-related)
2030 * Returns 0 if success, or
2031 * -1 by lack of space, or
2032 * -2 if the transcoding fails (for *in is not valid utf8 string or
2033 * the result of transformation can't fit into the encoding we want), or
2034 * -3 if there the last byte can't form a single output char.
2036 * The value of @inlen after return is the number of octets consumed
2037 * as the return value is 0, else unpredictable.
2038 * The value of @outlen after return is the number of octets produced.
2041 xmlEncInputChunk(xmlCharEncodingHandler
*handler
, unsigned char *out
,
2042 int *outlen
, const unsigned char *in
, int *inlen
, int flush
) {
2046 if (handler
->input
!= NULL
) {
2047 ret
= handler
->input(out
, outlen
, in
, inlen
);
2051 #ifdef LIBXML_ICONV_ENABLED
2052 else if (handler
->iconv_in
!= NULL
) {
2053 ret
= xmlIconvWrapper(handler
->iconv_in
, out
, outlen
, in
, inlen
);
2055 #endif /* LIBXML_ICONV_ENABLED */
2056 #ifdef LIBXML_ICU_ENABLED
2057 else if (handler
->uconv_in
!= NULL
) {
2058 ret
= xmlUconvWrapper(handler
->uconv_in
, 1, out
, outlen
, in
, inlen
,
2061 #endif /* LIBXML_ICU_ENABLED */
2072 * xmlEncOutputChunk:
2073 * @handler: encoding handler
2074 * @out: a pointer to an array of bytes to store the result
2075 * @outlen: the length of @out
2076 * @in: a pointer to an array of input bytes
2077 * @inlen: the length of @in
2079 * Returns 0 if success, or
2080 * -1 by lack of space, or
2081 * -2 if the transcoding fails (for *in is not valid utf8 string or
2082 * the result of transformation can't fit into the encoding we want), or
2083 * -3 if there the last byte can't form a single output char.
2084 * -4 if no output function was found.
2086 * The value of @inlen after return is the number of octets consumed
2087 * as the return value is 0, else unpredictable.
2088 * The value of @outlen after return is the number of octets produced.
2091 xmlEncOutputChunk(xmlCharEncodingHandler
*handler
, unsigned char *out
,
2092 int *outlen
, const unsigned char *in
, int *inlen
) {
2095 if (handler
->output
!= NULL
) {
2096 ret
= handler
->output(out
, outlen
, in
, inlen
);
2100 #ifdef LIBXML_ICONV_ENABLED
2101 else if (handler
->iconv_out
!= NULL
) {
2102 ret
= xmlIconvWrapper(handler
->iconv_out
, out
, outlen
, in
, inlen
);
2104 #endif /* LIBXML_ICONV_ENABLED */
2105 #ifdef LIBXML_ICU_ENABLED
2106 else if (handler
->uconv_out
!= NULL
) {
2107 ret
= xmlUconvWrapper(handler
->uconv_out
, 0, out
, outlen
, in
, inlen
,
2110 #endif /* LIBXML_ICU_ENABLED */
2121 * xmlCharEncFirstLine:
2122 * @handler: char encoding transformation data structure
2123 * @out: an xmlBuffer for the output.
2124 * @in: an xmlBuffer for the input
2126 * DEPERECATED: Don't use.
2129 xmlCharEncFirstLine(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2131 return(xmlCharEncInFunc(handler
, out
, in
));
2136 * @input: a parser input buffer
2137 * @flush: try to flush all the raw buffer
2139 * Generic front-end for the encoding handler on parser input
2141 * Returns the number of byte written if success, or
2143 * -2 if the transcoding fails (for *in is not valid utf8 string or
2144 * the result of transformation can't fit into the encoding we want), or
2147 xmlCharEncInput(xmlParserInputBufferPtr input
, int flush
)
2157 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2158 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2160 out
= input
->buffer
;
2163 toconv
= xmlBufUse(in
);
2166 if ((toconv
> 64 * 1024) && (flush
== 0))
2168 written
= xmlBufAvail(out
);
2169 if (toconv
* 2 >= written
) {
2170 if (xmlBufGrow(out
, toconv
* 2) < 0)
2172 written
= xmlBufAvail(out
);
2174 if ((written
> 128 * 1024) && (flush
== 0))
2175 written
= 128 * 1024;
2179 ret
= xmlEncInputChunk(input
->encoder
, xmlBufEnd(out
), &c_out
,
2180 xmlBufContent(in
), &c_in
, flush
);
2181 xmlBufShrink(in
, c_in
);
2182 xmlBufAddLen(out
, c_out
);
2188 #ifdef DEBUG_ENCODING
2189 xmlGenericError(xmlGenericErrorContext
,
2190 "converted %d bytes to %d bytes of input\n",
2195 #ifdef DEBUG_ENCODING
2196 xmlGenericError(xmlGenericErrorContext
,
2197 "converted %d bytes to %d bytes of input, %d left\n",
2198 c_in
, c_out
, (int)xmlBufUse(in
));
2202 #ifdef DEBUG_ENCODING
2203 xmlGenericError(xmlGenericErrorContext
,
2204 "converted %d bytes to %d bytes of input, %d left\n",
2205 c_in
, c_out
, (int)xmlBufUse(in
));
2210 const xmlChar
*content
= xmlBufContent(in
);
2212 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2213 content
[0], content
[1],
2214 content
[2], content
[3]);
2216 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2217 "input conversion failed due to input error, bytes %s\n",
2222 * Ignore when input buffer is not on a boundary
2226 return (c_out
? c_out
: ret
);
2231 * @handler: char encoding transformation data structure
2232 * @out: an xmlBuffer for the output.
2233 * @in: an xmlBuffer for the input
2235 * Generic front-end for the encoding handler input function
2237 * Returns the number of byte written if success, or
2239 * -2 if the transcoding fails (for *in is not valid utf8 string or
2240 * the result of transformation can't fit into the encoding we want), or
2243 xmlCharEncInFunc(xmlCharEncodingHandler
* handler
, xmlBufferPtr out
,
2250 if (handler
== NULL
)
2260 written
= out
->size
- out
->use
-1; /* count '\0' */
2261 if (toconv
* 2 >= written
) {
2262 xmlBufferGrow(out
, out
->size
+ toconv
* 2);
2263 written
= out
->size
- out
->use
- 1;
2265 ret
= xmlEncInputChunk(handler
, &out
->content
[out
->use
], &written
,
2266 in
->content
, &toconv
, 1);
2267 xmlBufferShrink(in
, toconv
);
2268 out
->use
+= written
;
2269 out
->content
[out
->use
] = 0;
2275 #ifdef DEBUG_ENCODING
2276 xmlGenericError(xmlGenericErrorContext
,
2277 "converted %d bytes to %d bytes of input\n",
2282 #ifdef DEBUG_ENCODING
2283 xmlGenericError(xmlGenericErrorContext
,
2284 "converted %d bytes to %d bytes of input, %d left\n",
2285 toconv
, written
, in
->use
);
2289 #ifdef DEBUG_ENCODING
2290 xmlGenericError(xmlGenericErrorContext
,
2291 "converted %d bytes to %d bytes of input, %d left\n",
2292 toconv
, written
, in
->use
);
2298 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2299 in
->content
[0], in
->content
[1],
2300 in
->content
[2], in
->content
[3]);
2302 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2303 "input conversion failed due to input error, bytes %s\n",
2308 * Ignore when input buffer is not on a boundary
2312 return (written
? written
: ret
);
2315 #ifdef LIBXML_OUTPUT_ENABLED
2318 * @output: a parser output buffer
2319 * @init: is this an initialization call without data
2321 * Generic front-end for the encoding handler on parser output
2322 * a first call with @init == 1 has to be made first to initiate the
2323 * output in case of non-stateless encoding needing to initiate their
2324 * state or the output (like the BOM in UTF16).
2325 * In case of UTF8 sequence conversion errors for the given encoder,
2326 * the content will be automatically remapped to a CharRef sequence.
2328 * Returns the number of byte written if success, or
2330 * -2 if the transcoding fails (for *in is not valid utf8 string or
2331 * the result of transformation can't fit into the encoding we want), or
2334 xmlCharEncOutput(xmlOutputBufferPtr output
, int init
)
2345 if ((output
== NULL
) || (output
->encoder
== NULL
) ||
2346 (output
->buffer
== NULL
) || (output
->conv
== NULL
))
2349 in
= output
->buffer
;
2353 written
= xmlBufAvail(out
);
2356 * First specific handling of the initialization call
2361 /* TODO: Check return value. */
2362 xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2364 xmlBufAddLen(out
, c_out
);
2365 #ifdef DEBUG_ENCODING
2366 xmlGenericError(xmlGenericErrorContext
,
2367 "initialized encoder\n");
2373 * Conversion itself.
2375 toconv
= xmlBufUse(in
);
2377 return (writtentot
);
2378 if (toconv
> 64 * 1024)
2380 if (toconv
* 4 >= written
) {
2381 xmlBufGrow(out
, toconv
* 4);
2382 written
= xmlBufAvail(out
);
2384 if (written
> 256 * 1024)
2385 written
= 256 * 1024;
2389 ret
= xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2390 xmlBufContent(in
), &c_in
);
2391 xmlBufShrink(in
, c_in
);
2392 xmlBufAddLen(out
, c_out
);
2393 writtentot
+= c_out
;
2396 /* Can be a limitation of iconv or uconv */
2403 * Attempt to handle error cases
2407 #ifdef DEBUG_ENCODING
2408 xmlGenericError(xmlGenericErrorContext
,
2409 "converted %d bytes to %d bytes of output\n",
2414 #ifdef DEBUG_ENCODING
2415 xmlGenericError(xmlGenericErrorContext
,
2416 "output conversion failed by lack of space\n");
2420 #ifdef DEBUG_ENCODING
2421 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2422 c_in
, c_out
, (int) xmlBufUse(in
));
2426 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2427 "xmlCharEncOutFunc: no output function !\n", NULL
);
2431 xmlChar charref
[20];
2432 int len
= xmlBufUse(in
);
2433 xmlChar
*content
= xmlBufContent(in
);
2434 int cur
, charrefLen
;
2436 cur
= xmlGetUTF8Char(content
, &len
);
2440 #ifdef DEBUG_ENCODING
2441 xmlGenericError(xmlGenericErrorContext
,
2442 "handling output conversion error\n");
2443 xmlGenericError(xmlGenericErrorContext
,
2444 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2445 content
[0], content
[1],
2446 content
[2], content
[3]);
2449 * Removes the UTF8 sequence, and replace it by a charref
2450 * and continue the transcoding phase, hoping the error
2451 * did not mangle the encoder state.
2453 charrefLen
= snprintf((char *) &charref
[0], sizeof(charref
),
2455 xmlBufShrink(in
, len
);
2456 xmlBufGrow(out
, charrefLen
* 4);
2457 c_out
= xmlBufAvail(out
);
2459 ret
= xmlEncOutputChunk(output
->encoder
, xmlBufEnd(out
), &c_out
,
2462 if ((ret
< 0) || (c_in
!= charrefLen
)) {
2465 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2466 content
[0], content
[1],
2467 content
[2], content
[3]);
2469 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2470 "output conversion failed due to conv error, bytes %s\n",
2476 xmlBufAddLen(out
, c_out
);
2477 writtentot
+= c_out
;
2481 return(writtentot
? writtentot
: ret
);
2486 * xmlCharEncOutFunc:
2487 * @handler: char encoding transformation data structure
2488 * @out: an xmlBuffer for the output.
2489 * @in: an xmlBuffer for the input
2491 * Generic front-end for the encoding handler output function
2492 * a first call with @in == NULL has to be made firs to initiate the
2493 * output in case of non-stateless encoding needing to initiate their
2494 * state or the output (like the BOM in UTF16).
2495 * In case of UTF8 sequence conversion errors for the given encoder,
2496 * the content will be automatically remapped to a CharRef sequence.
2498 * Returns the number of byte written if success, or
2500 * -2 if the transcoding fails (for *in is not valid utf8 string or
2501 * the result of transformation can't fit into the encoding we want), or
2504 xmlCharEncOutFunc(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2511 if (handler
== NULL
) return(-1);
2512 if (out
== NULL
) return(-1);
2516 written
= out
->size
- out
->use
;
2519 written
--; /* Gennady: count '/0' */
2522 * First specific handling of in = NULL, i.e. the initialization call
2526 /* TODO: Check return value. */
2527 xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2529 out
->use
+= written
;
2530 out
->content
[out
->use
] = 0;
2531 #ifdef DEBUG_ENCODING
2532 xmlGenericError(xmlGenericErrorContext
,
2533 "initialized encoder\n");
2539 * Conversion itself.
2544 if (toconv
* 4 >= written
) {
2545 xmlBufferGrow(out
, toconv
* 4);
2546 written
= out
->size
- out
->use
- 1;
2548 ret
= xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2549 in
->content
, &toconv
);
2550 xmlBufferShrink(in
, toconv
);
2551 out
->use
+= written
;
2552 writtentot
+= written
;
2553 out
->content
[out
->use
] = 0;
2556 /* Can be a limitation of iconv or uconv */
2563 * Attempt to handle error cases
2567 #ifdef DEBUG_ENCODING
2568 xmlGenericError(xmlGenericErrorContext
,
2569 "converted %d bytes to %d bytes of output\n",
2574 #ifdef DEBUG_ENCODING
2575 xmlGenericError(xmlGenericErrorContext
,
2576 "output conversion failed by lack of space\n");
2580 #ifdef DEBUG_ENCODING
2581 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2582 toconv
, written
, in
->use
);
2586 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2587 "xmlCharEncOutFunc: no output function !\n", NULL
);
2591 xmlChar charref
[20];
2593 const xmlChar
*utf
= (const xmlChar
*) in
->content
;
2594 int cur
, charrefLen
;
2596 cur
= xmlGetUTF8Char(utf
, &len
);
2600 #ifdef DEBUG_ENCODING
2601 xmlGenericError(xmlGenericErrorContext
,
2602 "handling output conversion error\n");
2603 xmlGenericError(xmlGenericErrorContext
,
2604 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2605 in
->content
[0], in
->content
[1],
2606 in
->content
[2], in
->content
[3]);
2609 * Removes the UTF8 sequence, and replace it by a charref
2610 * and continue the transcoding phase, hoping the error
2611 * did not mangle the encoder state.
2613 charrefLen
= snprintf((char *) &charref
[0], sizeof(charref
),
2615 xmlBufferShrink(in
, len
);
2616 xmlBufferGrow(out
, charrefLen
* 4);
2617 written
= out
->size
- out
->use
- 1;
2618 toconv
= charrefLen
;
2619 ret
= xmlEncOutputChunk(handler
, &out
->content
[out
->use
], &written
,
2622 if ((ret
< 0) || (toconv
!= charrefLen
)) {
2625 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2626 in
->content
[0], in
->content
[1],
2627 in
->content
[2], in
->content
[3]);
2629 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2630 "output conversion failed due to conv error, bytes %s\n",
2632 in
->content
[0] = ' ';
2636 out
->use
+= written
;
2637 writtentot
+= written
;
2638 out
->content
[out
->use
] = 0;
2642 return(writtentot
? writtentot
: ret
);
2646 * xmlCharEncCloseFunc:
2647 * @handler: char encoding transformation data structure
2649 * Generic front-end for encoding handler close function
2651 * Returns 0 if success, or -1 in case of error
2654 xmlCharEncCloseFunc(xmlCharEncodingHandler
*handler
) {
2659 if (handler
== NULL
) return(-1);
2661 for (i
= 0; i
< (int) NUM_DEFAULT_HANDLERS
; i
++) {
2662 if (handler
== &defaultHandlers
[i
])
2666 if (handlers
!= NULL
) {
2667 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
2668 if (handler
== handlers
[i
])
2672 #ifdef LIBXML_ICONV_ENABLED
2674 * Iconv handlers can be used only once, free the whole block.
2675 * and the associated icon resources.
2677 if ((handler
->iconv_out
!= NULL
) || (handler
->iconv_in
!= NULL
)) {
2679 if (handler
->iconv_out
!= NULL
) {
2680 if (iconv_close(handler
->iconv_out
))
2682 handler
->iconv_out
= NULL
;
2684 if (handler
->iconv_in
!= NULL
) {
2685 if (iconv_close(handler
->iconv_in
))
2687 handler
->iconv_in
= NULL
;
2690 #endif /* LIBXML_ICONV_ENABLED */
2691 #ifdef LIBXML_ICU_ENABLED
2692 if ((handler
->uconv_out
!= NULL
) || (handler
->uconv_in
!= NULL
)) {
2694 if (handler
->uconv_out
!= NULL
) {
2695 closeIcuConverter(handler
->uconv_out
);
2696 handler
->uconv_out
= NULL
;
2698 if (handler
->uconv_in
!= NULL
) {
2699 closeIcuConverter(handler
->uconv_in
);
2700 handler
->uconv_in
= NULL
;
2705 /* free up only dynamic handlers iconv/uconv */
2706 if (handler
->name
!= NULL
)
2707 xmlFree(handler
->name
);
2708 handler
->name
= NULL
;
2711 #ifdef DEBUG_ENCODING
2713 xmlGenericError(xmlGenericErrorContext
,
2714 "failed to close the encoding handler\n");
2716 xmlGenericError(xmlGenericErrorContext
,
2717 "closed the encoding handler\n");
2725 * @ctxt: an XML parser context
2727 * This function provides the current index of the parser relative
2728 * to the start of the current entity. This function is computed in
2729 * bytes from the beginning starting at zero and finishing at the
2730 * size in byte of the file if parsing a file. The function is
2731 * of constant cost if the input is UTF-8 but can be costly if run
2732 * on non-UTF-8 input.
2734 * Returns the index in bytes from the beginning of the entity or -1
2735 * in case the index could not be computed.
2738 xmlByteConsumed(xmlParserCtxtPtr ctxt
) {
2739 xmlParserInputPtr in
;
2741 if (ctxt
== NULL
) return(-1);
2743 if (in
== NULL
) return(-1);
2744 if ((in
->buf
!= NULL
) && (in
->buf
->encoder
!= NULL
)) {
2745 unsigned int unused
= 0;
2746 xmlCharEncodingHandler
* handler
= in
->buf
->encoder
;
2748 * Encoding conversion, compute the number of unused original
2749 * bytes from the input not consumed and subtract that from
2750 * the raw consumed value, this is not a cheap operation
2752 if (in
->end
- in
->cur
> 0) {
2753 unsigned char convbuf
[32000];
2754 const unsigned char *cur
= (const unsigned char *)in
->cur
;
2755 int toconv
= in
->end
- in
->cur
, written
= 32000;
2760 toconv
= in
->end
- cur
;
2762 ret
= xmlEncOutputChunk(handler
, &convbuf
[0], &written
,
2772 } while (ret
== -2);
2774 if (in
->buf
->rawconsumed
< unused
)
2776 return(in
->buf
->rawconsumed
- unused
);
2778 return(in
->consumed
+ (in
->cur
- in
->base
));
2781 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2782 #ifdef LIBXML_ISO8859X_ENABLED
2786 * @out: a pointer to an array of bytes to store the result
2787 * @outlen: the length of @out
2788 * @in: a pointer to an array of UTF-8 chars
2789 * @inlen: the length of @in
2790 * @xlattable: the 2-level transcoding table
2792 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2793 * block of chars out.
2795 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2796 * The value of @inlen after return is the number of octets consumed
2797 * as the return value is positive, else unpredictable.
2798 * The value of @outlen after return is the number of octets consumed.
2801 UTF8ToISO8859x(unsigned char* out
, int *outlen
,
2802 const unsigned char* in
, int *inlen
,
2803 const unsigned char* const xlattable
) {
2804 const unsigned char* outstart
= out
;
2805 const unsigned char* inend
;
2806 const unsigned char* instart
= in
;
2807 const unsigned char* processed
= in
;
2809 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
2810 (xlattable
== NULL
))
2814 * initialization nothing to do
2820 inend
= in
+ (*inlen
);
2821 while (in
< inend
) {
2822 unsigned char d
= *in
++;
2825 } else if (d
< 0xC0) {
2826 /* trailing byte in leading position */
2827 *outlen
= out
- outstart
;
2828 *inlen
= processed
- instart
;
2830 } else if (d
< 0xE0) {
2832 if (!(in
< inend
)) {
2833 /* trailing byte not in input buffer */
2834 *outlen
= out
- outstart
;
2835 *inlen
= processed
- instart
;
2839 if ((c
& 0xC0) != 0x80) {
2840 /* not a trailing byte */
2841 *outlen
= out
- outstart
;
2842 *inlen
= processed
- instart
;
2847 d
= xlattable
[48 + c
+ xlattable
[d
] * 64];
2849 /* not in character set */
2850 *outlen
= out
- outstart
;
2851 *inlen
= processed
- instart
;
2855 } else if (d
< 0xF0) {
2858 if (!(in
< inend
- 1)) {
2859 /* trailing bytes not in input buffer */
2860 *outlen
= out
- outstart
;
2861 *inlen
= processed
- instart
;
2865 if ((c1
& 0xC0) != 0x80) {
2866 /* not a trailing byte (c1) */
2867 *outlen
= out
- outstart
;
2868 *inlen
= processed
- instart
;
2872 if ((c2
& 0xC0) != 0x80) {
2873 /* not a trailing byte (c2) */
2874 *outlen
= out
- outstart
;
2875 *inlen
= processed
- instart
;
2881 d
= xlattable
[48 + c2
+ xlattable
[48 + c1
+
2882 xlattable
[32 + d
] * 64] * 64];
2884 /* not in character set */
2885 *outlen
= out
- outstart
;
2886 *inlen
= processed
- instart
;
2891 /* cannot transcode >= U+010000 */
2892 *outlen
= out
- outstart
;
2893 *inlen
= processed
- instart
;
2898 *outlen
= out
- outstart
;
2899 *inlen
= processed
- instart
;
2905 * @out: a pointer to an array of bytes to store the result
2906 * @outlen: the length of @out
2907 * @in: a pointer to an array of ISO Latin 1 chars
2908 * @inlen: the length of @in
2910 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2911 * block of chars out.
2912 * Returns 0 if success, or -1 otherwise
2913 * The value of @inlen after return is the number of octets consumed
2914 * The value of @outlen after return is the number of octets produced.
2917 ISO8859xToUTF8(unsigned char* out
, int *outlen
,
2918 const unsigned char* in
, int *inlen
,
2919 unsigned short const *unicodetable
) {
2920 unsigned char* outstart
= out
;
2921 unsigned char* outend
;
2922 const unsigned char* instart
= in
;
2923 const unsigned char* inend
;
2924 const unsigned char* instop
;
2927 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
2928 (in
== NULL
) || (unicodetable
== NULL
))
2930 outend
= out
+ *outlen
;
2931 inend
= in
+ *inlen
;
2934 while ((in
< inend
) && (out
< outend
- 2)) {
2936 c
= unicodetable
[*in
- 0x80];
2938 /* undefined code point */
2939 *outlen
= out
- outstart
;
2940 *inlen
= in
- instart
;
2944 *out
++ = ((c
>> 6) & 0x1F) | 0xC0;
2945 *out
++ = (c
& 0x3F) | 0x80;
2947 *out
++ = ((c
>> 12) & 0x0F) | 0xE0;
2948 *out
++ = ((c
>> 6) & 0x3F) | 0x80;
2949 *out
++ = (c
& 0x3F) | 0x80;
2953 if (instop
- in
> outend
- out
) instop
= in
+ (outend
- out
);
2954 while ((*in
< 0x80) && (in
< instop
)) {
2958 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
2961 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
2964 *outlen
= out
- outstart
;
2965 *inlen
= in
- instart
;
2970 /************************************************************************
2971 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2972 ************************************************************************/
2974 static unsigned short const xmlunicodetable_ISO8859_2
[128] = {
2975 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2976 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2977 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2978 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2979 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2980 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2981 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2982 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2983 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2984 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2985 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2986 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2987 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2988 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2989 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2990 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2993 static const unsigned char xmltranscodetable_ISO8859_2
[48 + 6 * 64] = {
2994 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3002 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3003 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3004 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3005 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3006 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3014 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3015 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3016 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3017 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3018 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3019 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3020 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3023 static unsigned short const xmlunicodetable_ISO8859_3
[128] = {
3024 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3025 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3026 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3027 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3028 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3029 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3030 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3031 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3032 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3033 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3034 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3035 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3036 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3037 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3038 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3039 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3042 static const unsigned char xmltranscodetable_ISO8859_3
[48 + 7 * 64] = {
3043 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3051 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3052 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3053 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3056 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3070 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3071 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3072 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3073 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3076 static unsigned short const xmlunicodetable_ISO8859_4
[128] = {
3077 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3078 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3079 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3080 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3081 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3082 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3083 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3084 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3085 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3086 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3087 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3088 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3089 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3090 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3091 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3092 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3095 static const unsigned char xmltranscodetable_ISO8859_4
[48 + 6 * 64] = {
3096 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3104 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3105 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3106 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3107 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3108 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3109 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3110 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3114 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3120 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3121 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3122 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3125 static unsigned short const xmlunicodetable_ISO8859_5
[128] = {
3126 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3127 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3128 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3129 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3130 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3131 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3132 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3133 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3134 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3135 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3136 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3137 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3138 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3139 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3140 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3141 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3144 static const unsigned char xmltranscodetable_ISO8859_5
[48 + 6 * 64] = {
3145 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3153 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3154 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3157 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3158 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3159 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3160 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3161 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 static unsigned short const xmlunicodetable_ISO8859_6
[128] = {
3175 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3176 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3177 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3178 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3179 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3180 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3181 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3182 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3183 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3184 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3185 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3186 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3187 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3188 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3189 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3190 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3193 static const unsigned char xmltranscodetable_ISO8859_6
[48 + 5 * 64] = {
3194 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3200 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3202 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3203 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3211 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3212 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3213 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3214 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219 static unsigned short const xmlunicodetable_ISO8859_7
[128] = {
3220 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3221 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3222 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3223 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3224 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3225 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3226 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3227 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3228 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3229 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3230 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3231 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3232 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3233 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3234 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3235 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3238 static const unsigned char xmltranscodetable_ISO8859_7
[48 + 7 * 64] = {
3239 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3247 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3248 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3249 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3250 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3263 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3264 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3265 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3266 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 static unsigned short const xmlunicodetable_ISO8859_8
[128] = {
3273 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3274 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3275 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3276 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3277 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3278 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3279 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3280 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3285 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3286 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3287 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3288 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3291 static const unsigned char xmltranscodetable_ISO8859_8
[48 + 7 * 64] = {
3292 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3300 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3301 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3302 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3303 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3316 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3321 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 static unsigned short const xmlunicodetable_ISO8859_9
[128] = {
3326 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3327 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3328 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3329 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3330 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3331 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3332 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3333 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3334 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3335 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3336 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3337 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3338 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3339 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3340 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3341 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3344 static const unsigned char xmltranscodetable_ISO8859_9
[48 + 5 * 64] = {
3345 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3353 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3354 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3355 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3356 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3357 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3358 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3359 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 static unsigned short const xmlunicodetable_ISO8859_10
[128] = {
3371 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3372 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3373 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3374 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3375 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3376 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3377 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3378 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3379 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3380 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3381 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3382 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3383 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3384 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3385 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3386 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3389 static const unsigned char xmltranscodetable_ISO8859_10
[48 + 7 * 64] = {
3390 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3398 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3399 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3400 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3402 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3404 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3408 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3409 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3418 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3419 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3420 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3423 static unsigned short const xmlunicodetable_ISO8859_11
[128] = {
3424 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3425 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3426 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3427 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3428 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3429 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3430 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3431 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3432 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3433 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3434 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3435 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3436 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3437 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3438 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3439 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3442 static const unsigned char xmltranscodetable_ISO8859_11
[48 + 6 * 64] = {
3443 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3451 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3452 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3458 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3459 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3460 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3461 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3462 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3467 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 static unsigned short const xmlunicodetable_ISO8859_13
[128] = {
3473 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3474 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3475 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3476 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3477 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3478 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3479 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3480 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3481 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3482 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3483 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3484 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3485 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3486 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3487 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3488 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3491 static const unsigned char xmltranscodetable_ISO8859_13
[48 + 7 * 64] = {
3492 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3500 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3501 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3502 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3503 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3512 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3513 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3514 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3515 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3516 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3517 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3518 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3519 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3520 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3522 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3525 static unsigned short const xmlunicodetable_ISO8859_14
[128] = {
3526 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3527 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3528 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3529 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3530 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3531 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3532 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3533 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3534 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3535 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3536 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3537 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3538 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3539 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3540 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3541 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3544 static const unsigned char xmltranscodetable_ISO8859_14
[48 + 10 * 64] = {
3545 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3553 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3554 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3555 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3560 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3580 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3585 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3586 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3587 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3590 static unsigned short const xmlunicodetable_ISO8859_15
[128] = {
3591 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3596 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3597 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3598 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3599 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3602 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3603 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3606 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3609 static const unsigned char xmltranscodetable_ISO8859_15
[48 + 6 * 64] = {
3610 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3620 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3621 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3633 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3634 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3635 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3636 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3639 static unsigned short const xmlunicodetable_ISO8859_16
[128] = {
3640 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3641 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3642 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3643 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3644 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3645 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3646 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3647 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3648 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3649 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3650 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3651 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3652 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3653 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3654 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3655 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3658 static const unsigned char xmltranscodetable_ISO8859_16
[48 + 9 * 64] = {
3659 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3667 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3668 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3669 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3670 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3671 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3676 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3678 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3695 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3696 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3697 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3702 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3705 static int ISO8859_2ToUTF8 (unsigned char* out
, int *outlen
,
3706 const unsigned char* in
, int *inlen
) {
3707 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_2
);
3709 static int UTF8ToISO8859_2 (unsigned char* out
, int *outlen
,
3710 const unsigned char* in
, int *inlen
) {
3711 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_2
);
3714 static int ISO8859_3ToUTF8 (unsigned char* out
, int *outlen
,
3715 const unsigned char* in
, int *inlen
) {
3716 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_3
);
3718 static int UTF8ToISO8859_3 (unsigned char* out
, int *outlen
,
3719 const unsigned char* in
, int *inlen
) {
3720 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_3
);
3723 static int ISO8859_4ToUTF8 (unsigned char* out
, int *outlen
,
3724 const unsigned char* in
, int *inlen
) {
3725 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_4
);
3727 static int UTF8ToISO8859_4 (unsigned char* out
, int *outlen
,
3728 const unsigned char* in
, int *inlen
) {
3729 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_4
);
3732 static int ISO8859_5ToUTF8 (unsigned char* out
, int *outlen
,
3733 const unsigned char* in
, int *inlen
) {
3734 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_5
);
3736 static int UTF8ToISO8859_5 (unsigned char* out
, int *outlen
,
3737 const unsigned char* in
, int *inlen
) {
3738 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_5
);
3741 static int ISO8859_6ToUTF8 (unsigned char* out
, int *outlen
,
3742 const unsigned char* in
, int *inlen
) {
3743 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_6
);
3745 static int UTF8ToISO8859_6 (unsigned char* out
, int *outlen
,
3746 const unsigned char* in
, int *inlen
) {
3747 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_6
);
3750 static int ISO8859_7ToUTF8 (unsigned char* out
, int *outlen
,
3751 const unsigned char* in
, int *inlen
) {
3752 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_7
);
3754 static int UTF8ToISO8859_7 (unsigned char* out
, int *outlen
,
3755 const unsigned char* in
, int *inlen
) {
3756 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_7
);
3759 static int ISO8859_8ToUTF8 (unsigned char* out
, int *outlen
,
3760 const unsigned char* in
, int *inlen
) {
3761 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_8
);
3763 static int UTF8ToISO8859_8 (unsigned char* out
, int *outlen
,
3764 const unsigned char* in
, int *inlen
) {
3765 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_8
);
3768 static int ISO8859_9ToUTF8 (unsigned char* out
, int *outlen
,
3769 const unsigned char* in
, int *inlen
) {
3770 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_9
);
3772 static int UTF8ToISO8859_9 (unsigned char* out
, int *outlen
,
3773 const unsigned char* in
, int *inlen
) {
3774 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_9
);
3777 static int ISO8859_10ToUTF8 (unsigned char* out
, int *outlen
,
3778 const unsigned char* in
, int *inlen
) {
3779 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_10
);
3781 static int UTF8ToISO8859_10 (unsigned char* out
, int *outlen
,
3782 const unsigned char* in
, int *inlen
) {
3783 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_10
);
3786 static int ISO8859_11ToUTF8 (unsigned char* out
, int *outlen
,
3787 const unsigned char* in
, int *inlen
) {
3788 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_11
);
3790 static int UTF8ToISO8859_11 (unsigned char* out
, int *outlen
,
3791 const unsigned char* in
, int *inlen
) {
3792 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_11
);
3795 static int ISO8859_13ToUTF8 (unsigned char* out
, int *outlen
,
3796 const unsigned char* in
, int *inlen
) {
3797 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_13
);
3799 static int UTF8ToISO8859_13 (unsigned char* out
, int *outlen
,
3800 const unsigned char* in
, int *inlen
) {
3801 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_13
);
3804 static int ISO8859_14ToUTF8 (unsigned char* out
, int *outlen
,
3805 const unsigned char* in
, int *inlen
) {
3806 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_14
);
3808 static int UTF8ToISO8859_14 (unsigned char* out
, int *outlen
,
3809 const unsigned char* in
, int *inlen
) {
3810 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_14
);
3813 static int ISO8859_15ToUTF8 (unsigned char* out
, int *outlen
,
3814 const unsigned char* in
, int *inlen
) {
3815 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_15
);
3817 static int UTF8ToISO8859_15 (unsigned char* out
, int *outlen
,
3818 const unsigned char* in
, int *inlen
) {
3819 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_15
);
3822 static int ISO8859_16ToUTF8 (unsigned char* out
, int *outlen
,
3823 const unsigned char* in
, int *inlen
) {
3824 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_16
);
3826 static int UTF8ToISO8859_16 (unsigned char* out
, int *outlen
,
3827 const unsigned char* in
, int *inlen
) {
3828 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_16
);