mscms: Fix double free on error path in EnumColorProfilesA (scan-build).
[wine.git] / libs / xml2 / encoding.c
blob52bc15aca37f481a615fbee64fb74ac5f5982204
1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
18 * daniel@veillard.com
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
23 #define IN_LIBXML
24 #include "libxml.h"
26 #include <string.h>
27 #include <limits.h>
28 #include <ctype.h>
29 #include <stdlib.h>
31 #ifdef LIBXML_ICONV_ENABLED
32 #include <errno.h>
33 #endif
35 #include <libxml/encoding.h>
36 #include <libxml/xmlmemory.h>
37 #ifdef LIBXML_HTML_ENABLED
38 #include <libxml/HTMLparser.h>
39 #endif
40 #include <libxml/globals.h>
41 #include <libxml/xmlerror.h>
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
47 #ifdef LIBXML_ICU_ENABLED
48 #include <unicode/ucnv.h>
49 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50 #define ICU_PIVOT_BUF_SIZE 1024
51 typedef struct _uconv_t uconv_t;
52 struct _uconv_t {
53 UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55 UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
56 UChar *pivot_source;
57 UChar *pivot_target;
59 #endif
61 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63 struct _xmlCharEncodingAlias {
64 const char *name;
65 const char *alias;
68 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69 static int xmlCharEncodingAliasesNb = 0;
70 static int xmlCharEncodingAliasesMax = 0;
72 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
73 #if 0
74 #define DEBUG_ENCODING /* Define this to get encoding traces */
75 #endif
76 #else
77 #endif
79 static int xmlLittleEndian = 1;
81 /**
82 * xmlEncodingErrMemory:
83 * @extra: extra information
85 * Handle an out of memory condition
87 static void
88 xmlEncodingErrMemory(const char *extra)
90 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
93 /**
94 * xmlErrEncoding:
95 * @error: the error number
96 * @msg: the error message
98 * n encoding error
100 static void LIBXML_ATTR_FORMAT(2,0)
101 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
103 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104 XML_FROM_I18N, error, XML_ERR_FATAL,
105 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
108 #ifdef LIBXML_ICU_ENABLED
109 static uconv_t*
110 openIcuConverter(const char* name, int toUnicode)
112 UErrorCode status = U_ZERO_ERROR;
113 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114 if (conv == NULL)
115 return NULL;
117 conv->pivot_source = conv->pivot_buf;
118 conv->pivot_target = conv->pivot_buf;
120 conv->uconv = ucnv_open(name, &status);
121 if (U_FAILURE(status))
122 goto error;
124 status = U_ZERO_ERROR;
125 if (toUnicode) {
126 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
127 NULL, NULL, NULL, &status);
129 else {
130 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
131 NULL, NULL, NULL, &status);
133 if (U_FAILURE(status))
134 goto error;
136 status = U_ZERO_ERROR;
137 conv->utf8 = ucnv_open("UTF-8", &status);
138 if (U_SUCCESS(status))
139 return conv;
141 error:
142 if (conv->uconv)
143 ucnv_close(conv->uconv);
144 xmlFree(conv);
145 return NULL;
148 static void
149 closeIcuConverter(uconv_t *conv)
151 if (conv != NULL) {
152 ucnv_close(conv->uconv);
153 ucnv_close(conv->utf8);
154 xmlFree(conv);
157 #endif /* LIBXML_ICU_ENABLED */
159 /************************************************************************
161 * Conversions To/From UTF8 encoding *
163 ************************************************************************/
166 * asciiToUTF8:
167 * @out: a pointer to an array of bytes to store the result
168 * @outlen: the length of @out
169 * @in: a pointer to an array of ASCII chars
170 * @inlen: the length of @in
172 * Take a block of ASCII chars in and try to convert it to an UTF-8
173 * block of chars out.
174 * Returns 0 if success, or -1 otherwise
175 * The value of @inlen after return is the number of octets consumed
176 * if the return value is positive, else unpredictable.
177 * The value of @outlen after return is the number of octets produced.
179 static int
180 asciiToUTF8(unsigned char* out, int *outlen,
181 const unsigned char* in, int *inlen) {
182 unsigned char* outstart = out;
183 const unsigned char* base = in;
184 const unsigned char* processed = in;
185 unsigned char* outend = out + *outlen;
186 const unsigned char* inend;
187 unsigned int c;
189 inend = in + (*inlen);
190 while ((in < inend) && (out - outstart + 5 < *outlen)) {
191 c= *in++;
193 if (out >= outend)
194 break;
195 if (c < 0x80) {
196 *out++ = c;
197 } else {
198 *outlen = out - outstart;
199 *inlen = processed - base;
200 return(-2);
203 processed = (const unsigned char*) in;
205 *outlen = out - outstart;
206 *inlen = processed - base;
207 return(*outlen);
210 #ifdef LIBXML_OUTPUT_ENABLED
212 * UTF8Toascii:
213 * @out: a pointer to an array of bytes to store the result
214 * @outlen: the length of @out
215 * @in: a pointer to an array of UTF-8 chars
216 * @inlen: the length of @in
218 * Take a block of UTF-8 chars in and try to convert it to an ASCII
219 * block of chars out.
221 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
222 * The value of @inlen after return is the number of octets consumed
223 * if the return value is positive, else unpredictable.
224 * The value of @outlen after return is the number of octets produced.
226 static int
227 UTF8Toascii(unsigned char* out, int *outlen,
228 const unsigned char* in, int *inlen) {
229 const unsigned char* processed = in;
230 const unsigned char* outend;
231 const unsigned char* outstart = out;
232 const unsigned char* instart = in;
233 const unsigned char* inend;
234 unsigned int c, d;
235 int trailing;
237 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
238 if (in == NULL) {
240 * initialization nothing to do
242 *outlen = 0;
243 *inlen = 0;
244 return(0);
246 inend = in + (*inlen);
247 outend = out + (*outlen);
248 while (in < inend) {
249 d = *in++;
250 if (d < 0x80) { c= d; trailing= 0; }
251 else if (d < 0xC0) {
252 /* trailing byte in leading position */
253 *outlen = out - outstart;
254 *inlen = processed - instart;
255 return(-2);
256 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
257 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
258 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
259 else {
260 /* no chance for this in Ascii */
261 *outlen = out - outstart;
262 *inlen = processed - instart;
263 return(-2);
266 if (inend - in < trailing) {
267 break;
270 for ( ; trailing; trailing--) {
271 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
272 break;
273 c <<= 6;
274 c |= d & 0x3F;
277 /* assertion: c is a single UTF-4 value */
278 if (c < 0x80) {
279 if (out >= outend)
280 break;
281 *out++ = c;
282 } else {
283 /* no chance for this in Ascii */
284 *outlen = out - outstart;
285 *inlen = processed - instart;
286 return(-2);
288 processed = in;
290 *outlen = out - outstart;
291 *inlen = processed - instart;
292 return(*outlen);
294 #endif /* LIBXML_OUTPUT_ENABLED */
297 * isolat1ToUTF8:
298 * @out: a pointer to an array of bytes to store the result
299 * @outlen: the length of @out
300 * @in: a pointer to an array of ISO Latin 1 chars
301 * @inlen: the length of @in
303 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
304 * block of chars out.
305 * Returns the number of bytes written if success, or -1 otherwise
306 * The value of @inlen after return is the number of octets consumed
307 * if the return value is positive, else unpredictable.
308 * The value of @outlen after return is the number of octets produced.
311 isolat1ToUTF8(unsigned char* out, int *outlen,
312 const unsigned char* in, int *inlen) {
313 unsigned char* outstart = out;
314 const unsigned char* base = in;
315 unsigned char* outend;
316 const unsigned char* inend;
317 const unsigned char* instop;
319 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
320 return(-1);
322 outend = out + *outlen;
323 inend = in + (*inlen);
324 instop = inend;
326 while ((in < inend) && (out < outend - 1)) {
327 if (*in >= 0x80) {
328 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
329 *out++ = ((*in) & 0x3F) | 0x80;
330 ++in;
332 if ((instop - in) > (outend - out)) instop = in + (outend - out);
333 while ((in < instop) && (*in < 0x80)) {
334 *out++ = *in++;
337 if ((in < inend) && (out < outend) && (*in < 0x80)) {
338 *out++ = *in++;
340 *outlen = out - outstart;
341 *inlen = in - base;
342 return(*outlen);
346 * UTF8ToUTF8:
347 * @out: a pointer to an array of bytes to store the result
348 * @outlen: the length of @out
349 * @inb: a pointer to an array of UTF-8 chars
350 * @inlenb: the length of @in in UTF-8 chars
352 * No op copy operation for UTF8 handling.
354 * Returns the number of bytes written, or -1 if lack of space.
355 * The value of *inlen after return is the number of octets consumed
356 * if the return value is positive, else unpredictable.
358 static int
359 UTF8ToUTF8(unsigned char* out, int *outlen,
360 const unsigned char* inb, int *inlenb)
362 int len;
364 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
365 return(-1);
366 if (inb == NULL) {
367 /* inb == NULL means output is initialized. */
368 *outlen = 0;
369 *inlenb = 0;
370 return(0);
372 if (*outlen > *inlenb) {
373 len = *inlenb;
374 } else {
375 len = *outlen;
377 if (len < 0)
378 return(-1);
381 * FIXME: Conversion functions must assure valid UTF-8, so we have
382 * to check for UTF-8 validity. Preferably, this converter shouldn't
383 * be used at all.
385 memcpy(out, inb, len);
387 *outlen = len;
388 *inlenb = len;
389 return(*outlen);
393 #ifdef LIBXML_OUTPUT_ENABLED
395 * UTF8Toisolat1:
396 * @out: a pointer to an array of bytes to store the result
397 * @outlen: the length of @out
398 * @in: a pointer to an array of UTF-8 chars
399 * @inlen: the length of @in
401 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
402 * block of chars out.
404 * Returns the number of bytes written if success, -2 if the transcoding fails,
405 or -1 otherwise
406 * The value of @inlen after return is the number of octets consumed
407 * if the return value is positive, else unpredictable.
408 * The value of @outlen after return is the number of octets produced.
411 UTF8Toisolat1(unsigned char* out, int *outlen,
412 const unsigned char* in, int *inlen) {
413 const unsigned char* processed = in;
414 const unsigned char* outend;
415 const unsigned char* outstart = out;
416 const unsigned char* instart = in;
417 const unsigned char* inend;
418 unsigned int c, d;
419 int trailing;
421 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
422 if (in == NULL) {
424 * initialization nothing to do
426 *outlen = 0;
427 *inlen = 0;
428 return(0);
430 inend = in + (*inlen);
431 outend = out + (*outlen);
432 while (in < inend) {
433 d = *in++;
434 if (d < 0x80) { c= d; trailing= 0; }
435 else if (d < 0xC0) {
436 /* trailing byte in leading position */
437 *outlen = out - outstart;
438 *inlen = processed - instart;
439 return(-2);
440 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
441 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
442 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
443 else {
444 /* no chance for this in IsoLat1 */
445 *outlen = out - outstart;
446 *inlen = processed - instart;
447 return(-2);
450 if (inend - in < trailing) {
451 break;
454 for ( ; trailing; trailing--) {
455 if (in >= inend)
456 break;
457 if (((d= *in++) & 0xC0) != 0x80) {
458 *outlen = out - outstart;
459 *inlen = processed - instart;
460 return(-2);
462 c <<= 6;
463 c |= d & 0x3F;
466 /* assertion: c is a single UTF-4 value */
467 if (c <= 0xFF) {
468 if (out >= outend)
469 break;
470 *out++ = c;
471 } else {
472 /* no chance for this in IsoLat1 */
473 *outlen = out - outstart;
474 *inlen = processed - instart;
475 return(-2);
477 processed = in;
479 *outlen = out - outstart;
480 *inlen = processed - instart;
481 return(*outlen);
483 #endif /* LIBXML_OUTPUT_ENABLED */
486 * UTF16LEToUTF8:
487 * @out: a pointer to an array of bytes to store the result
488 * @outlen: the length of @out
489 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
490 * @inlenb: the length of @in in UTF-16LE chars
492 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
493 * block of chars out. This function assumes the endian property
494 * is the same between the native type of this machine and the
495 * inputed one.
497 * Returns the number of bytes written, or -1 if lack of space, or -2
498 * if the transcoding fails (if *in is not a valid utf16 string)
499 * The value of *inlen after return is the number of octets consumed
500 * if the return value is positive, else unpredictable.
502 static int
503 UTF16LEToUTF8(unsigned char* out, int *outlen,
504 const unsigned char* inb, int *inlenb)
506 unsigned char* outstart = out;
507 const unsigned char* processed = inb;
508 unsigned char* outend;
509 unsigned short* in = (unsigned short*) inb;
510 unsigned short* inend;
511 unsigned int c, d, inlen;
512 unsigned char *tmp;
513 int bits;
515 if (*outlen == 0) {
516 *inlenb = 0;
517 return(0);
519 outend = out + *outlen;
520 if ((*inlenb % 2) == 1)
521 (*inlenb)--;
522 inlen = *inlenb / 2;
523 inend = in + inlen;
524 while ((in < inend) && (out - outstart + 5 < *outlen)) {
525 if (xmlLittleEndian) {
526 c= *in++;
527 } else {
528 tmp = (unsigned char *) in;
529 c = *tmp++;
530 c = c | (*tmp << 8);
531 in++;
533 if ((c & 0xFC00) == 0xD800) { /* surrogates */
534 if (in >= inend) { /* handle split mutli-byte characters */
535 break;
537 if (xmlLittleEndian) {
538 d = *in++;
539 } else {
540 tmp = (unsigned char *) in;
541 d = *tmp++;
542 d = d | (*tmp << 8);
543 in++;
545 if ((d & 0xFC00) == 0xDC00) {
546 c &= 0x03FF;
547 c <<= 10;
548 c |= d & 0x03FF;
549 c += 0x10000;
551 else {
552 *outlen = out - outstart;
553 *inlenb = processed - inb;
554 return(-2);
558 /* assertion: c is a single UTF-4 value */
559 if (out >= outend)
560 break;
561 if (c < 0x80) { *out++= c; bits= -6; }
562 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
563 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
564 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
566 for ( ; bits >= 0; bits-= 6) {
567 if (out >= outend)
568 break;
569 *out++= ((c >> bits) & 0x3F) | 0x80;
571 processed = (const unsigned char*) in;
573 *outlen = out - outstart;
574 *inlenb = processed - inb;
575 return(*outlen);
578 #ifdef LIBXML_OUTPUT_ENABLED
580 * UTF8ToUTF16LE:
581 * @outb: a pointer to an array of bytes to store the result
582 * @outlen: the length of @outb
583 * @in: a pointer to an array of UTF-8 chars
584 * @inlen: the length of @in
586 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
587 * block of chars out.
589 * Returns the number of bytes written, or -1 if lack of space, or -2
590 * if the transcoding failed.
592 static int
593 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
594 const unsigned char* in, int *inlen)
596 unsigned short* out = (unsigned short*) outb;
597 const unsigned char* processed = in;
598 const unsigned char *const instart = in;
599 unsigned short* outstart= out;
600 unsigned short* outend;
601 const unsigned char* inend;
602 unsigned int c, d;
603 int trailing;
604 unsigned char *tmp;
605 unsigned short tmp1, tmp2;
607 /* UTF16LE encoding has no BOM */
608 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
609 if (in == NULL) {
610 *outlen = 0;
611 *inlen = 0;
612 return(0);
614 inend= in + *inlen;
615 outend = out + (*outlen / 2);
616 while (in < inend) {
617 d= *in++;
618 if (d < 0x80) { c= d; trailing= 0; }
619 else if (d < 0xC0) {
620 /* trailing byte in leading position */
621 *outlen = (out - outstart) * 2;
622 *inlen = processed - instart;
623 return(-2);
624 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
625 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
626 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
627 else {
628 /* no chance for this in UTF-16 */
629 *outlen = (out - outstart) * 2;
630 *inlen = processed - instart;
631 return(-2);
634 if (inend - in < trailing) {
635 break;
638 for ( ; trailing; trailing--) {
639 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
640 break;
641 c <<= 6;
642 c |= d & 0x3F;
645 /* assertion: c is a single UTF-4 value */
646 if (c < 0x10000) {
647 if (out >= outend)
648 break;
649 if (xmlLittleEndian) {
650 *out++ = c;
651 } else {
652 tmp = (unsigned char *) out;
653 *tmp = (unsigned char) c; /* Explicit truncation */
654 *(tmp + 1) = c >> 8 ;
655 out++;
658 else if (c < 0x110000) {
659 if (out+1 >= outend)
660 break;
661 c -= 0x10000;
662 if (xmlLittleEndian) {
663 *out++ = 0xD800 | (c >> 10);
664 *out++ = 0xDC00 | (c & 0x03FF);
665 } else {
666 tmp1 = 0xD800 | (c >> 10);
667 tmp = (unsigned char *) out;
668 *tmp = (unsigned char) tmp1; /* Explicit truncation */
669 *(tmp + 1) = tmp1 >> 8;
670 out++;
672 tmp2 = 0xDC00 | (c & 0x03FF);
673 tmp = (unsigned char *) out;
674 *tmp = (unsigned char) tmp2; /* Explicit truncation */
675 *(tmp + 1) = tmp2 >> 8;
676 out++;
679 else
680 break;
681 processed = in;
683 *outlen = (out - outstart) * 2;
684 *inlen = processed - instart;
685 return(*outlen);
689 * UTF8ToUTF16:
690 * @outb: a pointer to an array of bytes to store the result
691 * @outlen: the length of @outb
692 * @in: a pointer to an array of UTF-8 chars
693 * @inlen: the length of @in
695 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
696 * block of chars out.
698 * Returns the number of bytes written, or -1 if lack of space, or -2
699 * if the transcoding failed.
701 static int
702 UTF8ToUTF16(unsigned char* outb, int *outlen,
703 const unsigned char* in, int *inlen)
705 if (in == NULL) {
707 * initialization, add the Byte Order Mark for UTF-16LE
709 if (*outlen >= 2) {
710 outb[0] = 0xFF;
711 outb[1] = 0xFE;
712 *outlen = 2;
713 *inlen = 0;
714 #ifdef DEBUG_ENCODING
715 xmlGenericError(xmlGenericErrorContext,
716 "Added FFFE Byte Order Mark\n");
717 #endif
718 return(2);
720 *outlen = 0;
721 *inlen = 0;
722 return(0);
724 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
726 #endif /* LIBXML_OUTPUT_ENABLED */
729 * UTF16BEToUTF8:
730 * @out: a pointer to an array of bytes to store the result
731 * @outlen: the length of @out
732 * @inb: a pointer to an array of UTF-16 passed as a byte array
733 * @inlenb: the length of @in in UTF-16 chars
735 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
736 * block of chars out. This function assumes the endian property
737 * is the same between the native type of this machine and the
738 * inputed one.
740 * Returns the number of bytes written, or -1 if lack of space, or -2
741 * if the transcoding fails (if *in is not a valid utf16 string)
742 * The value of *inlen after return is the number of octets consumed
743 * if the return value is positive, else unpredictable.
745 static int
746 UTF16BEToUTF8(unsigned char* out, int *outlen,
747 const unsigned char* inb, int *inlenb)
749 unsigned char* outstart = out;
750 const unsigned char* processed = inb;
751 unsigned char* outend;
752 unsigned short* in = (unsigned short*) inb;
753 unsigned short* inend;
754 unsigned int c, d, inlen;
755 unsigned char *tmp;
756 int bits;
758 if (*outlen == 0) {
759 *inlenb = 0;
760 return(0);
762 outend = out + *outlen;
763 if ((*inlenb % 2) == 1)
764 (*inlenb)--;
765 inlen = *inlenb / 2;
766 inend= in + inlen;
767 while ((in < inend) && (out - outstart + 5 < *outlen)) {
768 if (xmlLittleEndian) {
769 tmp = (unsigned char *) in;
770 c = *tmp++;
771 c = (c << 8) | *tmp;
772 in++;
773 } else {
774 c= *in++;
776 if ((c & 0xFC00) == 0xD800) { /* surrogates */
777 if (in >= inend) { /* handle split mutli-byte characters */
778 break;
780 if (xmlLittleEndian) {
781 tmp = (unsigned char *) in;
782 d = *tmp++;
783 d = (d << 8) | *tmp;
784 in++;
785 } else {
786 d= *in++;
788 if ((d & 0xFC00) == 0xDC00) {
789 c &= 0x03FF;
790 c <<= 10;
791 c |= d & 0x03FF;
792 c += 0x10000;
794 else {
795 *outlen = out - outstart;
796 *inlenb = processed - inb;
797 return(-2);
801 /* assertion: c is a single UTF-4 value */
802 if (out >= outend)
803 break;
804 if (c < 0x80) { *out++= c; bits= -6; }
805 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
806 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
807 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
809 for ( ; bits >= 0; bits-= 6) {
810 if (out >= outend)
811 break;
812 *out++= ((c >> bits) & 0x3F) | 0x80;
814 processed = (const unsigned char*) in;
816 *outlen = out - outstart;
817 *inlenb = processed - inb;
818 return(*outlen);
821 #ifdef LIBXML_OUTPUT_ENABLED
823 * UTF8ToUTF16BE:
824 * @outb: a pointer to an array of bytes to store the result
825 * @outlen: the length of @outb
826 * @in: a pointer to an array of UTF-8 chars
827 * @inlen: the length of @in
829 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
830 * block of chars out.
832 * Returns the number of byte written, or -1 by lack of space, or -2
833 * if the transcoding failed.
835 static int
836 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
837 const unsigned char* in, int *inlen)
839 unsigned short* out = (unsigned short*) outb;
840 const unsigned char* processed = in;
841 const unsigned char *const instart = in;
842 unsigned short* outstart= out;
843 unsigned short* outend;
844 const unsigned char* inend;
845 unsigned int c, d;
846 int trailing;
847 unsigned char *tmp;
848 unsigned short tmp1, tmp2;
850 /* UTF-16BE has no BOM */
851 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
852 if (in == NULL) {
853 *outlen = 0;
854 *inlen = 0;
855 return(0);
857 inend= in + *inlen;
858 outend = out + (*outlen / 2);
859 while (in < inend) {
860 d= *in++;
861 if (d < 0x80) { c= d; trailing= 0; }
862 else if (d < 0xC0) {
863 /* trailing byte in leading position */
864 *outlen = out - outstart;
865 *inlen = processed - instart;
866 return(-2);
867 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
868 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
869 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
870 else {
871 /* no chance for this in UTF-16 */
872 *outlen = out - outstart;
873 *inlen = processed - instart;
874 return(-2);
877 if (inend - in < trailing) {
878 break;
881 for ( ; trailing; trailing--) {
882 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
883 c <<= 6;
884 c |= d & 0x3F;
887 /* assertion: c is a single UTF-4 value */
888 if (c < 0x10000) {
889 if (out >= outend) break;
890 if (xmlLittleEndian) {
891 tmp = (unsigned char *) out;
892 *tmp = c >> 8;
893 *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
894 out++;
895 } else {
896 *out++ = c;
899 else if (c < 0x110000) {
900 if (out+1 >= outend) break;
901 c -= 0x10000;
902 if (xmlLittleEndian) {
903 tmp1 = 0xD800 | (c >> 10);
904 tmp = (unsigned char *) out;
905 *tmp = tmp1 >> 8;
906 *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
907 out++;
909 tmp2 = 0xDC00 | (c & 0x03FF);
910 tmp = (unsigned char *) out;
911 *tmp = tmp2 >> 8;
912 *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
913 out++;
914 } else {
915 *out++ = 0xD800 | (c >> 10);
916 *out++ = 0xDC00 | (c & 0x03FF);
919 else
920 break;
921 processed = in;
923 *outlen = (out - outstart) * 2;
924 *inlen = processed - instart;
925 return(*outlen);
927 #endif /* LIBXML_OUTPUT_ENABLED */
929 /************************************************************************
931 * Generic encoding handling routines *
933 ************************************************************************/
936 * xmlDetectCharEncoding:
937 * @in: a pointer to the first bytes of the XML entity, must be at least
938 * 2 bytes long (at least 4 if encoding is UTF4 variant).
939 * @len: pointer to the length of the buffer
941 * Guess the encoding of the entity using the first bytes of the entity content
942 * according to the non-normative appendix F of the XML-1.0 recommendation.
944 * Returns one of the XML_CHAR_ENCODING_... values.
946 xmlCharEncoding
947 xmlDetectCharEncoding(const unsigned char* in, int len)
949 if (in == NULL)
950 return(XML_CHAR_ENCODING_NONE);
951 if (len >= 4) {
952 if ((in[0] == 0x00) && (in[1] == 0x00) &&
953 (in[2] == 0x00) && (in[3] == 0x3C))
954 return(XML_CHAR_ENCODING_UCS4BE);
955 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956 (in[2] == 0x00) && (in[3] == 0x00))
957 return(XML_CHAR_ENCODING_UCS4LE);
958 if ((in[0] == 0x00) && (in[1] == 0x00) &&
959 (in[2] == 0x3C) && (in[3] == 0x00))
960 return(XML_CHAR_ENCODING_UCS4_2143);
961 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
962 (in[2] == 0x00) && (in[3] == 0x00))
963 return(XML_CHAR_ENCODING_UCS4_3412);
964 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
965 (in[2] == 0xA7) && (in[3] == 0x94))
966 return(XML_CHAR_ENCODING_EBCDIC);
967 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
968 (in[2] == 0x78) && (in[3] == 0x6D))
969 return(XML_CHAR_ENCODING_UTF8);
971 * Although not part of the recommendation, we also
972 * attempt an "auto-recognition" of UTF-16LE and
973 * UTF-16BE encodings.
975 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
976 (in[2] == 0x3F) && (in[3] == 0x00))
977 return(XML_CHAR_ENCODING_UTF16LE);
978 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
979 (in[2] == 0x00) && (in[3] == 0x3F))
980 return(XML_CHAR_ENCODING_UTF16BE);
982 if (len >= 3) {
984 * Errata on XML-1.0 June 20 2001
985 * We now allow an UTF8 encoded BOM
987 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
988 (in[2] == 0xBF))
989 return(XML_CHAR_ENCODING_UTF8);
991 /* For UTF-16 we can recognize by the BOM */
992 if (len >= 2) {
993 if ((in[0] == 0xFE) && (in[1] == 0xFF))
994 return(XML_CHAR_ENCODING_UTF16BE);
995 if ((in[0] == 0xFF) && (in[1] == 0xFE))
996 return(XML_CHAR_ENCODING_UTF16LE);
998 return(XML_CHAR_ENCODING_NONE);
1002 * xmlCleanupEncodingAliases:
1004 * Unregisters all aliases
1006 void
1007 xmlCleanupEncodingAliases(void) {
1008 int i;
1010 if (xmlCharEncodingAliases == NULL)
1011 return;
1013 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1014 if (xmlCharEncodingAliases[i].name != NULL)
1015 xmlFree((char *) xmlCharEncodingAliases[i].name);
1016 if (xmlCharEncodingAliases[i].alias != NULL)
1017 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1019 xmlCharEncodingAliasesNb = 0;
1020 xmlCharEncodingAliasesMax = 0;
1021 xmlFree(xmlCharEncodingAliases);
1022 xmlCharEncodingAliases = NULL;
1026 * xmlGetEncodingAlias:
1027 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1029 * Lookup an encoding name for the given alias.
1031 * Returns NULL if not found, otherwise the original name
1033 const char *
1034 xmlGetEncodingAlias(const char *alias) {
1035 int i;
1036 char upper[100];
1038 if (alias == NULL)
1039 return(NULL);
1041 if (xmlCharEncodingAliases == NULL)
1042 return(NULL);
1044 for (i = 0;i < 99;i++) {
1045 upper[i] = (char) toupper((unsigned char) alias[i]);
1046 if (upper[i] == 0) break;
1048 upper[i] = 0;
1051 * Walk down the list looking for a definition of the alias
1053 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1054 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1055 return(xmlCharEncodingAliases[i].name);
1058 return(NULL);
1062 * xmlAddEncodingAlias:
1063 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1064 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1066 * Registers an alias @alias for an encoding named @name. Existing alias
1067 * will be overwritten.
1069 * Returns 0 in case of success, -1 in case of error
1072 xmlAddEncodingAlias(const char *name, const char *alias) {
1073 int i;
1074 char upper[100];
1076 if ((name == NULL) || (alias == NULL))
1077 return(-1);
1079 for (i = 0;i < 99;i++) {
1080 upper[i] = (char) toupper((unsigned char) alias[i]);
1081 if (upper[i] == 0) break;
1083 upper[i] = 0;
1085 if (xmlCharEncodingAliases == NULL) {
1086 xmlCharEncodingAliasesNb = 0;
1087 xmlCharEncodingAliasesMax = 20;
1088 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1089 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1090 if (xmlCharEncodingAliases == NULL)
1091 return(-1);
1092 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1093 xmlCharEncodingAliasesMax *= 2;
1094 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1095 xmlRealloc(xmlCharEncodingAliases,
1096 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1099 * Walk down the list looking for a definition of the alias
1101 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1102 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1104 * Replace the definition.
1106 xmlFree((char *) xmlCharEncodingAliases[i].name);
1107 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1108 return(0);
1112 * Add the definition
1114 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1115 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1116 xmlCharEncodingAliasesNb++;
1117 return(0);
1121 * xmlDelEncodingAlias:
1122 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1124 * Unregisters an encoding alias @alias
1126 * Returns 0 in case of success, -1 in case of error
1129 xmlDelEncodingAlias(const char *alias) {
1130 int i;
1132 if (alias == NULL)
1133 return(-1);
1135 if (xmlCharEncodingAliases == NULL)
1136 return(-1);
1138 * Walk down the list looking for a definition of the alias
1140 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1141 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1142 xmlFree((char *) xmlCharEncodingAliases[i].name);
1143 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1144 xmlCharEncodingAliasesNb--;
1145 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1146 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1147 return(0);
1150 return(-1);
1154 * xmlParseCharEncoding:
1155 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1157 * Compare the string to the encoding schemes already known. Note
1158 * that the comparison is case insensitive accordingly to the section
1159 * [XML] 4.3.3 Character Encoding in Entities.
1161 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1162 * if not recognized.
1164 xmlCharEncoding
1165 xmlParseCharEncoding(const char* name)
1167 const char *alias;
1168 char upper[500];
1169 int i;
1171 if (name == NULL)
1172 return(XML_CHAR_ENCODING_NONE);
1175 * Do the alias resolution
1177 alias = xmlGetEncodingAlias(name);
1178 if (alias != NULL)
1179 name = alias;
1181 for (i = 0;i < 499;i++) {
1182 upper[i] = (char) toupper((unsigned char) name[i]);
1183 if (upper[i] == 0) break;
1185 upper[i] = 0;
1187 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1188 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1189 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1192 * NOTE: if we were able to parse this, the endianness of UTF16 is
1193 * already found and in use
1195 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1196 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1198 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1199 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1200 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1203 * NOTE: if we were able to parse this, the endianness of UCS4 is
1204 * already found and in use
1206 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1207 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1208 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1211 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1212 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1213 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1215 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1216 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1217 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1219 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1220 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1221 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1222 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1223 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1224 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1225 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1227 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1228 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1229 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1231 #ifdef DEBUG_ENCODING
1232 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1233 #endif
1234 return(XML_CHAR_ENCODING_ERROR);
1238 * xmlGetCharEncodingName:
1239 * @enc: the encoding
1241 * The "canonical" name for XML encoding.
1242 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1243 * Section 4.3.3 Character Encoding in Entities
1245 * Returns the canonical name for the given encoding
1248 const char*
1249 xmlGetCharEncodingName(xmlCharEncoding enc) {
1250 switch (enc) {
1251 case XML_CHAR_ENCODING_ERROR:
1252 return(NULL);
1253 case XML_CHAR_ENCODING_NONE:
1254 return(NULL);
1255 case XML_CHAR_ENCODING_UTF8:
1256 return("UTF-8");
1257 case XML_CHAR_ENCODING_UTF16LE:
1258 return("UTF-16");
1259 case XML_CHAR_ENCODING_UTF16BE:
1260 return("UTF-16");
1261 case XML_CHAR_ENCODING_EBCDIC:
1262 return("EBCDIC");
1263 case XML_CHAR_ENCODING_UCS4LE:
1264 return("ISO-10646-UCS-4");
1265 case XML_CHAR_ENCODING_UCS4BE:
1266 return("ISO-10646-UCS-4");
1267 case XML_CHAR_ENCODING_UCS4_2143:
1268 return("ISO-10646-UCS-4");
1269 case XML_CHAR_ENCODING_UCS4_3412:
1270 return("ISO-10646-UCS-4");
1271 case XML_CHAR_ENCODING_UCS2:
1272 return("ISO-10646-UCS-2");
1273 case XML_CHAR_ENCODING_8859_1:
1274 return("ISO-8859-1");
1275 case XML_CHAR_ENCODING_8859_2:
1276 return("ISO-8859-2");
1277 case XML_CHAR_ENCODING_8859_3:
1278 return("ISO-8859-3");
1279 case XML_CHAR_ENCODING_8859_4:
1280 return("ISO-8859-4");
1281 case XML_CHAR_ENCODING_8859_5:
1282 return("ISO-8859-5");
1283 case XML_CHAR_ENCODING_8859_6:
1284 return("ISO-8859-6");
1285 case XML_CHAR_ENCODING_8859_7:
1286 return("ISO-8859-7");
1287 case XML_CHAR_ENCODING_8859_8:
1288 return("ISO-8859-8");
1289 case XML_CHAR_ENCODING_8859_9:
1290 return("ISO-8859-9");
1291 case XML_CHAR_ENCODING_2022_JP:
1292 return("ISO-2022-JP");
1293 case XML_CHAR_ENCODING_SHIFT_JIS:
1294 return("Shift-JIS");
1295 case XML_CHAR_ENCODING_EUC_JP:
1296 return("EUC-JP");
1297 case XML_CHAR_ENCODING_ASCII:
1298 return(NULL);
1300 return(NULL);
1303 /************************************************************************
1305 * Char encoding handlers *
1307 ************************************************************************/
1309 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1310 defined(LIBXML_ISO8859X_ENABLED)
1312 #define DECLARE_ISO_FUNCS(n) \
1313 static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1314 const unsigned char* in, int *inlen); \
1315 static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1316 const unsigned char* in, int *inlen);
1318 /** DOC_DISABLE */
1319 DECLARE_ISO_FUNCS(2)
1320 DECLARE_ISO_FUNCS(3)
1321 DECLARE_ISO_FUNCS(4)
1322 DECLARE_ISO_FUNCS(5)
1323 DECLARE_ISO_FUNCS(6)
1324 DECLARE_ISO_FUNCS(7)
1325 DECLARE_ISO_FUNCS(8)
1326 DECLARE_ISO_FUNCS(9)
1327 DECLARE_ISO_FUNCS(10)
1328 DECLARE_ISO_FUNCS(11)
1329 DECLARE_ISO_FUNCS(13)
1330 DECLARE_ISO_FUNCS(14)
1331 DECLARE_ISO_FUNCS(15)
1332 DECLARE_ISO_FUNCS(16)
1333 /** DOC_ENABLE */
1335 #endif /* LIBXML_ISO8859X_ENABLED */
1337 #ifdef LIBXML_ICONV_ENABLED
1338 #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1339 #else
1340 #define EMPTY_ICONV
1341 #endif
1343 #ifdef LIBXML_ICU_ENABLED
1344 #define EMPTY_UCONV , NULL, NULL
1345 #else
1346 #define EMPTY_UCONV
1347 #endif
1349 #define MAKE_HANDLER(name, in, out) \
1350 { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1352 static const xmlCharEncodingHandler defaultHandlers[] = {
1353 MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1354 #ifdef LIBXML_OUTPUT_ENABLED
1355 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1356 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1357 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1358 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1359 ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1360 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1361 #ifdef LIBXML_HTML_ENABLED
1362 ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1363 #endif
1364 #else
1365 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1366 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1367 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1368 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1369 ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1370 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1371 #endif /* LIBXML_OUTPUT_ENABLED */
1373 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1374 defined(LIBXML_ISO8859X_ENABLED)
1375 ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1376 ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1377 ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1378 ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1379 ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1380 ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1381 ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1382 ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1383 ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1384 ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1385 ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1386 ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1387 ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1388 ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1389 #endif
1392 #define NUM_DEFAULT_HANDLERS \
1393 (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1395 static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1396 static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1398 /* the size should be growable, but it's not a big deal ... */
1399 #define MAX_ENCODING_HANDLERS 50
1400 static xmlCharEncodingHandlerPtr *handlers = NULL;
1401 static int nbCharEncodingHandler = 0;
1404 * xmlNewCharEncodingHandler:
1405 * @name: the encoding name, in UTF-8 format (ASCII actually)
1406 * @input: the xmlCharEncodingInputFunc to read that encoding
1407 * @output: the xmlCharEncodingOutputFunc to write that encoding
1409 * Create and registers an xmlCharEncodingHandler.
1411 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1413 xmlCharEncodingHandlerPtr
1414 xmlNewCharEncodingHandler(const char *name,
1415 xmlCharEncodingInputFunc input,
1416 xmlCharEncodingOutputFunc output) {
1417 xmlCharEncodingHandlerPtr handler;
1418 const char *alias;
1419 char upper[500];
1420 int i;
1421 char *up = NULL;
1424 * Do the alias resolution
1426 alias = xmlGetEncodingAlias(name);
1427 if (alias != NULL)
1428 name = alias;
1431 * Keep only the uppercase version of the encoding.
1433 if (name == NULL) {
1434 xmlEncodingErr(XML_I18N_NO_NAME,
1435 "xmlNewCharEncodingHandler : no name !\n", NULL);
1436 return(NULL);
1438 for (i = 0;i < 499;i++) {
1439 upper[i] = (char) toupper((unsigned char) name[i]);
1440 if (upper[i] == 0) break;
1442 upper[i] = 0;
1443 up = xmlMemStrdup(upper);
1444 if (up == NULL) {
1445 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1446 return(NULL);
1450 * allocate and fill-up an handler block.
1452 handler = (xmlCharEncodingHandlerPtr)
1453 xmlMalloc(sizeof(xmlCharEncodingHandler));
1454 if (handler == NULL) {
1455 xmlFree(up);
1456 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1457 return(NULL);
1459 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1460 handler->input = input;
1461 handler->output = output;
1462 handler->name = up;
1464 #ifdef LIBXML_ICONV_ENABLED
1465 handler->iconv_in = NULL;
1466 handler->iconv_out = NULL;
1467 #endif
1468 #ifdef LIBXML_ICU_ENABLED
1469 handler->uconv_in = NULL;
1470 handler->uconv_out = NULL;
1471 #endif
1474 * registers and returns the handler.
1476 xmlRegisterCharEncodingHandler(handler);
1477 #ifdef DEBUG_ENCODING
1478 xmlGenericError(xmlGenericErrorContext,
1479 "Registered encoding handler for %s\n", name);
1480 #endif
1481 return(handler);
1485 * xmlInitCharEncodingHandlers:
1487 * DEPRECATED: Alias for xmlInitParser.
1489 void
1490 xmlInitCharEncodingHandlers(void) {
1491 xmlInitParser();
1495 * xmlInitEncodingInternal:
1497 * Initialize the char encoding support.
1499 void
1500 xmlInitEncodingInternal(void) {
1501 unsigned short int tst = 0x1234;
1502 unsigned char *ptr = (unsigned char *) &tst;
1504 if (*ptr == 0x12) xmlLittleEndian = 0;
1505 else if (*ptr == 0x34) xmlLittleEndian = 1;
1506 else {
1507 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1508 "Odd problem at endianness detection\n", NULL);
1513 * xmlCleanupCharEncodingHandlers:
1515 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1516 * to free global state but see the warnings there. xmlCleanupParser
1517 * should be only called once at program exit. In most cases, you don't
1518 * have call cleanup functions at all.
1520 * Cleanup the memory allocated for the char encoding support, it
1521 * unregisters all the encoding handlers and the aliases.
1523 void
1524 xmlCleanupCharEncodingHandlers(void) {
1525 xmlCleanupEncodingAliases();
1527 if (handlers == NULL) return;
1529 for (;nbCharEncodingHandler > 0;) {
1530 nbCharEncodingHandler--;
1531 if (handlers[nbCharEncodingHandler] != NULL) {
1532 if (handlers[nbCharEncodingHandler]->name != NULL)
1533 xmlFree(handlers[nbCharEncodingHandler]->name);
1534 xmlFree(handlers[nbCharEncodingHandler]);
1537 xmlFree(handlers);
1538 handlers = NULL;
1539 nbCharEncodingHandler = 0;
1543 * xmlRegisterCharEncodingHandler:
1544 * @handler: the xmlCharEncodingHandlerPtr handler block
1546 * Register the char encoding handler, surprising, isn't it ?
1548 void
1549 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1550 if (handler == NULL) {
1551 xmlEncodingErr(XML_I18N_NO_HANDLER,
1552 "xmlRegisterCharEncodingHandler: NULL handler\n", NULL);
1553 return;
1555 if (handlers == NULL) {
1556 handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1557 if (handlers == NULL) {
1558 xmlEncodingErrMemory("allocating handler table");
1559 goto free_handler;
1563 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1564 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1565 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1566 "MAX_ENCODING_HANDLERS");
1567 goto free_handler;
1569 handlers[nbCharEncodingHandler++] = handler;
1570 return;
1572 free_handler:
1573 if (handler != NULL) {
1574 if (handler->name != NULL) {
1575 xmlFree(handler->name);
1577 xmlFree(handler);
1582 * xmlGetCharEncodingHandler:
1583 * @enc: an xmlCharEncoding value.
1585 * Search in the registered set the handler able to read/write that encoding.
1587 * Returns the handler or NULL if not found
1589 xmlCharEncodingHandlerPtr
1590 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1591 xmlCharEncodingHandlerPtr handler;
1593 switch (enc) {
1594 case XML_CHAR_ENCODING_ERROR:
1595 return(NULL);
1596 case XML_CHAR_ENCODING_NONE:
1597 return(NULL);
1598 case XML_CHAR_ENCODING_UTF8:
1599 return(NULL);
1600 case XML_CHAR_ENCODING_UTF16LE:
1601 return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1602 case XML_CHAR_ENCODING_UTF16BE:
1603 return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1604 case XML_CHAR_ENCODING_EBCDIC:
1605 handler = xmlFindCharEncodingHandler("EBCDIC");
1606 if (handler != NULL) return(handler);
1607 handler = xmlFindCharEncodingHandler("ebcdic");
1608 if (handler != NULL) return(handler);
1609 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1610 if (handler != NULL) return(handler);
1611 handler = xmlFindCharEncodingHandler("IBM-037");
1612 if (handler != NULL) return(handler);
1613 break;
1614 case XML_CHAR_ENCODING_UCS4BE:
1615 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1616 if (handler != NULL) return(handler);
1617 handler = xmlFindCharEncodingHandler("UCS-4");
1618 if (handler != NULL) return(handler);
1619 handler = xmlFindCharEncodingHandler("UCS4");
1620 if (handler != NULL) return(handler);
1621 break;
1622 case XML_CHAR_ENCODING_UCS4LE:
1623 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1624 if (handler != NULL) return(handler);
1625 handler = xmlFindCharEncodingHandler("UCS-4");
1626 if (handler != NULL) return(handler);
1627 handler = xmlFindCharEncodingHandler("UCS4");
1628 if (handler != NULL) return(handler);
1629 break;
1630 case XML_CHAR_ENCODING_UCS4_2143:
1631 break;
1632 case XML_CHAR_ENCODING_UCS4_3412:
1633 break;
1634 case XML_CHAR_ENCODING_UCS2:
1635 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1636 if (handler != NULL) return(handler);
1637 handler = xmlFindCharEncodingHandler("UCS-2");
1638 if (handler != NULL) return(handler);
1639 handler = xmlFindCharEncodingHandler("UCS2");
1640 if (handler != NULL) return(handler);
1641 break;
1644 * We used to keep ISO Latin encodings native in the
1645 * generated data. This led to so many problems that
1646 * this has been removed. One can still change this
1647 * back by registering no-ops encoders for those
1649 case XML_CHAR_ENCODING_8859_1:
1650 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1651 if (handler != NULL) return(handler);
1652 break;
1653 case XML_CHAR_ENCODING_8859_2:
1654 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1655 if (handler != NULL) return(handler);
1656 break;
1657 case XML_CHAR_ENCODING_8859_3:
1658 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1659 if (handler != NULL) return(handler);
1660 break;
1661 case XML_CHAR_ENCODING_8859_4:
1662 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1663 if (handler != NULL) return(handler);
1664 break;
1665 case XML_CHAR_ENCODING_8859_5:
1666 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1667 if (handler != NULL) return(handler);
1668 break;
1669 case XML_CHAR_ENCODING_8859_6:
1670 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1671 if (handler != NULL) return(handler);
1672 break;
1673 case XML_CHAR_ENCODING_8859_7:
1674 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1675 if (handler != NULL) return(handler);
1676 break;
1677 case XML_CHAR_ENCODING_8859_8:
1678 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1679 if (handler != NULL) return(handler);
1680 break;
1681 case XML_CHAR_ENCODING_8859_9:
1682 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1683 if (handler != NULL) return(handler);
1684 break;
1687 case XML_CHAR_ENCODING_2022_JP:
1688 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1689 if (handler != NULL) return(handler);
1690 break;
1691 case XML_CHAR_ENCODING_SHIFT_JIS:
1692 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1693 if (handler != NULL) return(handler);
1694 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1695 if (handler != NULL) return(handler);
1696 handler = xmlFindCharEncodingHandler("Shift_JIS");
1697 if (handler != NULL) return(handler);
1698 break;
1699 case XML_CHAR_ENCODING_EUC_JP:
1700 handler = xmlFindCharEncodingHandler("EUC-JP");
1701 if (handler != NULL) return(handler);
1702 break;
1703 default:
1704 break;
1707 #ifdef DEBUG_ENCODING
1708 xmlGenericError(xmlGenericErrorContext,
1709 "No handler found for encoding %d\n", enc);
1710 #endif
1711 return(NULL);
1715 * xmlFindCharEncodingHandler:
1716 * @name: a string describing the char encoding.
1718 * Search in the registered set the handler able to read/write that encoding
1719 * or create a new one.
1721 * Returns the handler or NULL if not found
1723 xmlCharEncodingHandlerPtr
1724 xmlFindCharEncodingHandler(const char *name) {
1725 const char *nalias;
1726 const char *norig;
1727 xmlCharEncoding alias;
1728 #ifdef LIBXML_ICONV_ENABLED
1729 xmlCharEncodingHandlerPtr enc;
1730 iconv_t icv_in, icv_out;
1731 #endif /* LIBXML_ICONV_ENABLED */
1732 #ifdef LIBXML_ICU_ENABLED
1733 xmlCharEncodingHandlerPtr encu;
1734 uconv_t *ucv_in, *ucv_out;
1735 #endif /* LIBXML_ICU_ENABLED */
1736 char upper[100];
1737 int i;
1739 if (name == NULL) return(NULL);
1740 if (name[0] == 0) return(NULL);
1743 * Do the alias resolution
1745 norig = name;
1746 nalias = xmlGetEncodingAlias(name);
1747 if (nalias != NULL)
1748 name = nalias;
1751 * Check first for directly registered encoding names
1753 for (i = 0;i < 99;i++) {
1754 upper[i] = (char) toupper((unsigned char) name[i]);
1755 if (upper[i] == 0) break;
1757 upper[i] = 0;
1759 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1760 if (strcmp(upper, defaultHandlers[i].name) == 0)
1761 return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1764 if (handlers != NULL) {
1765 for (i = 0;i < nbCharEncodingHandler; i++) {
1766 if (!strcmp(upper, handlers[i]->name)) {
1767 #ifdef DEBUG_ENCODING
1768 xmlGenericError(xmlGenericErrorContext,
1769 "Found registered handler for encoding %s\n", name);
1770 #endif
1771 return(handlers[i]);
1776 #ifdef LIBXML_ICONV_ENABLED
1777 /* check whether iconv can handle this */
1778 icv_in = iconv_open("UTF-8", name);
1779 icv_out = iconv_open(name, "UTF-8");
1780 if (icv_in == (iconv_t) -1) {
1781 icv_in = iconv_open("UTF-8", upper);
1783 if (icv_out == (iconv_t) -1) {
1784 icv_out = iconv_open(upper, "UTF-8");
1786 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1787 enc = (xmlCharEncodingHandlerPtr)
1788 xmlMalloc(sizeof(xmlCharEncodingHandler));
1789 if (enc == NULL) {
1790 iconv_close(icv_in);
1791 iconv_close(icv_out);
1792 return(NULL);
1794 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1795 enc->name = xmlMemStrdup(name);
1796 if (enc->name == NULL) {
1797 xmlFree(enc);
1798 iconv_close(icv_in);
1799 iconv_close(icv_out);
1800 return(NULL);
1802 enc->input = NULL;
1803 enc->output = NULL;
1804 enc->iconv_in = icv_in;
1805 enc->iconv_out = icv_out;
1806 #ifdef DEBUG_ENCODING
1807 xmlGenericError(xmlGenericErrorContext,
1808 "Found iconv handler for encoding %s\n", name);
1809 #endif
1810 return enc;
1811 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1812 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1813 "iconv : problems with filters for '%s'\n", name);
1814 if (icv_in != (iconv_t) -1)
1815 iconv_close(icv_in);
1816 else
1817 iconv_close(icv_out);
1819 #endif /* LIBXML_ICONV_ENABLED */
1820 #ifdef LIBXML_ICU_ENABLED
1821 /* check whether icu can handle this */
1822 ucv_in = openIcuConverter(name, 1);
1823 ucv_out = openIcuConverter(name, 0);
1824 if (ucv_in != NULL && ucv_out != NULL) {
1825 encu = (xmlCharEncodingHandlerPtr)
1826 xmlMalloc(sizeof(xmlCharEncodingHandler));
1827 if (encu == NULL) {
1828 closeIcuConverter(ucv_in);
1829 closeIcuConverter(ucv_out);
1830 return(NULL);
1832 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1833 encu->name = xmlMemStrdup(name);
1834 if (encu->name == NULL) {
1835 xmlFree(encu);
1836 closeIcuConverter(ucv_in);
1837 closeIcuConverter(ucv_out);
1838 return(NULL);
1840 encu->input = NULL;
1841 encu->output = NULL;
1842 encu->uconv_in = ucv_in;
1843 encu->uconv_out = ucv_out;
1844 #ifdef DEBUG_ENCODING
1845 xmlGenericError(xmlGenericErrorContext,
1846 "Found ICU converter handler for encoding %s\n", name);
1847 #endif
1848 return encu;
1849 } else if (ucv_in != NULL || ucv_out != NULL) {
1850 closeIcuConverter(ucv_in);
1851 closeIcuConverter(ucv_out);
1852 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1853 "ICU converter : problems with filters for '%s'\n", name);
1855 #endif /* LIBXML_ICU_ENABLED */
1857 #ifdef DEBUG_ENCODING
1858 xmlGenericError(xmlGenericErrorContext,
1859 "No handler found for encoding %s\n", name);
1860 #endif
1863 * Fallback using the canonical names
1865 alias = xmlParseCharEncoding(norig);
1866 if (alias != XML_CHAR_ENCODING_ERROR) {
1867 const char* canon;
1868 canon = xmlGetCharEncodingName(alias);
1869 if ((canon != NULL) && (strcmp(name, canon))) {
1870 return(xmlFindCharEncodingHandler(canon));
1874 /* If "none of the above", give up */
1875 return(NULL);
1878 /************************************************************************
1880 * ICONV based generic conversion functions *
1882 ************************************************************************/
1884 #ifdef LIBXML_ICONV_ENABLED
1886 * xmlIconvWrapper:
1887 * @cd: iconv converter data structure
1888 * @out: a pointer to an array of bytes to store the result
1889 * @outlen: the length of @out
1890 * @in: a pointer to an array of input bytes
1891 * @inlen: the length of @in
1893 * Returns 0 if success, or
1894 * -1 by lack of space, or
1895 * -2 if the transcoding fails (for *in is not valid utf8 string or
1896 * the result of transformation can't fit into the encoding we want), or
1897 * -3 if there the last byte can't form a single output char.
1899 * The value of @inlen after return is the number of octets consumed
1900 * as the return value is positive, else unpredictable.
1901 * The value of @outlen after return is the number of octets produced.
1903 static int
1904 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1905 const unsigned char *in, int *inlen) {
1906 size_t icv_inlen, icv_outlen;
1907 const char *icv_in = (const char *) in;
1908 char *icv_out = (char *) out;
1909 size_t ret;
1911 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1912 if (outlen != NULL) *outlen = 0;
1913 return(-1);
1915 icv_inlen = *inlen;
1916 icv_outlen = *outlen;
1918 * Some versions take const, other versions take non-const input.
1920 ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1921 *inlen -= icv_inlen;
1922 *outlen -= icv_outlen;
1923 if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1924 #ifdef EILSEQ
1925 if (errno == EILSEQ) {
1926 return -2;
1927 } else
1928 #endif
1929 #ifdef E2BIG
1930 if (errno == E2BIG) {
1931 return -1;
1932 } else
1933 #endif
1934 #ifdef EINVAL
1935 if (errno == EINVAL) {
1936 return -3;
1937 } else
1938 #endif
1940 return -3;
1943 return 0;
1945 #endif /* LIBXML_ICONV_ENABLED */
1947 /************************************************************************
1949 * ICU based generic conversion functions *
1951 ************************************************************************/
1953 #ifdef LIBXML_ICU_ENABLED
1955 * xmlUconvWrapper:
1956 * @cd: ICU uconverter data structure
1957 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1958 * @out: a pointer to an array of bytes to store the result
1959 * @outlen: the length of @out
1960 * @in: a pointer to an array of input bytes
1961 * @inlen: the length of @in
1962 * @flush: if true, indicates end of input
1964 * Returns 0 if success, or
1965 * -1 by lack of space, or
1966 * -2 if the transcoding fails (for *in is not valid utf8 string or
1967 * the result of transformation can't fit into the encoding we want), or
1968 * -3 if there the last byte can't form a single output char.
1970 * The value of @inlen after return is the number of octets consumed
1971 * as the return value is positive, else unpredictable.
1972 * The value of @outlen after return is the number of octets produced.
1974 static int
1975 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1976 const unsigned char *in, int *inlen, int flush) {
1977 const char *ucv_in = (const char *) in;
1978 char *ucv_out = (char *) out;
1979 UErrorCode err = U_ZERO_ERROR;
1981 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1982 if (outlen != NULL) *outlen = 0;
1983 return(-1);
1986 if (toUnicode) {
1987 /* encoding => UTF-16 => UTF-8 */
1988 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1989 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1990 &cd->pivot_source, &cd->pivot_target,
1991 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1992 } else {
1993 /* UTF-8 => UTF-16 => encoding */
1994 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1995 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1996 &cd->pivot_source, &cd->pivot_target,
1997 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1999 *inlen = ucv_in - (const char*) in;
2000 *outlen = ucv_out - (char *) out;
2001 if (U_SUCCESS(err)) {
2002 /* reset pivot buf if this is the last call for input (flush==TRUE) */
2003 if (flush)
2004 cd->pivot_source = cd->pivot_target = cd->pivot_buf;
2005 return 0;
2007 if (err == U_BUFFER_OVERFLOW_ERROR)
2008 return -1;
2009 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2010 return -2;
2011 return -3;
2013 #endif /* LIBXML_ICU_ENABLED */
2015 /************************************************************************
2017 * The real API used by libxml for on-the-fly conversion *
2019 ************************************************************************/
2022 * xmlEncInputChunk:
2023 * @handler: encoding handler
2024 * @out: a pointer to an array of bytes to store the result
2025 * @outlen: the length of @out
2026 * @in: a pointer to an array of input bytes
2027 * @inlen: the length of @in
2028 * @flush: flush (ICU-related)
2030 * Returns 0 if success, or
2031 * -1 by lack of space, or
2032 * -2 if the transcoding fails (for *in is not valid utf8 string or
2033 * the result of transformation can't fit into the encoding we want), or
2034 * -3 if there the last byte can't form a single output char.
2036 * The value of @inlen after return is the number of octets consumed
2037 * as the return value is 0, else unpredictable.
2038 * The value of @outlen after return is the number of octets produced.
2041 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2042 int *outlen, const unsigned char *in, int *inlen, int flush) {
2043 int ret;
2044 (void)flush;
2046 if (handler->input != NULL) {
2047 ret = handler->input(out, outlen, in, inlen);
2048 if (ret > 0)
2049 ret = 0;
2051 #ifdef LIBXML_ICONV_ENABLED
2052 else if (handler->iconv_in != NULL) {
2053 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2055 #endif /* LIBXML_ICONV_ENABLED */
2056 #ifdef LIBXML_ICU_ENABLED
2057 else if (handler->uconv_in != NULL) {
2058 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
2059 flush);
2061 #endif /* LIBXML_ICU_ENABLED */
2062 else {
2063 *outlen = 0;
2064 *inlen = 0;
2065 ret = -2;
2068 return(ret);
2072 * xmlEncOutputChunk:
2073 * @handler: encoding handler
2074 * @out: a pointer to an array of bytes to store the result
2075 * @outlen: the length of @out
2076 * @in: a pointer to an array of input bytes
2077 * @inlen: the length of @in
2079 * Returns 0 if success, or
2080 * -1 by lack of space, or
2081 * -2 if the transcoding fails (for *in is not valid utf8 string or
2082 * the result of transformation can't fit into the encoding we want), or
2083 * -3 if there the last byte can't form a single output char.
2084 * -4 if no output function was found.
2086 * The value of @inlen after return is the number of octets consumed
2087 * as the return value is 0, else unpredictable.
2088 * The value of @outlen after return is the number of octets produced.
2090 static int
2091 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2092 int *outlen, const unsigned char *in, int *inlen) {
2093 int ret;
2095 if (handler->output != NULL) {
2096 ret = handler->output(out, outlen, in, inlen);
2097 if (ret > 0)
2098 ret = 0;
2100 #ifdef LIBXML_ICONV_ENABLED
2101 else if (handler->iconv_out != NULL) {
2102 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2104 #endif /* LIBXML_ICONV_ENABLED */
2105 #ifdef LIBXML_ICU_ENABLED
2106 else if (handler->uconv_out != NULL) {
2107 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2110 #endif /* LIBXML_ICU_ENABLED */
2111 else {
2112 *outlen = 0;
2113 *inlen = 0;
2114 ret = -4;
2117 return(ret);
2121 * xmlCharEncFirstLine:
2122 * @handler: char encoding transformation data structure
2123 * @out: an xmlBuffer for the output.
2124 * @in: an xmlBuffer for the input
2126 * DEPERECATED: Don't use.
2129 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2130 xmlBufferPtr in) {
2131 return(xmlCharEncInFunc(handler, out, in));
2135 * xmlCharEncInput:
2136 * @input: a parser input buffer
2137 * @flush: try to flush all the raw buffer
2139 * Generic front-end for the encoding handler on parser input
2141 * Returns the number of byte written if success, or
2142 * -1 general error
2143 * -2 if the transcoding fails (for *in is not valid utf8 string or
2144 * the result of transformation can't fit into the encoding we want), or
2147 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2149 int ret;
2150 size_t written;
2151 size_t toconv;
2152 int c_in;
2153 int c_out;
2154 xmlBufPtr in;
2155 xmlBufPtr out;
2157 if ((input == NULL) || (input->encoder == NULL) ||
2158 (input->buffer == NULL) || (input->raw == NULL))
2159 return (-1);
2160 out = input->buffer;
2161 in = input->raw;
2163 toconv = xmlBufUse(in);
2164 if (toconv == 0)
2165 return (0);
2166 if ((toconv > 64 * 1024) && (flush == 0))
2167 toconv = 64 * 1024;
2168 written = xmlBufAvail(out);
2169 if (toconv * 2 >= written) {
2170 if (xmlBufGrow(out, toconv * 2) < 0)
2171 return (-1);
2172 written = xmlBufAvail(out);
2174 if ((written > 128 * 1024) && (flush == 0))
2175 written = 128 * 1024;
2177 c_in = toconv;
2178 c_out = written;
2179 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2180 xmlBufContent(in), &c_in, flush);
2181 xmlBufShrink(in, c_in);
2182 xmlBufAddLen(out, c_out);
2183 if (ret == -1)
2184 ret = -3;
2186 switch (ret) {
2187 case 0:
2188 #ifdef DEBUG_ENCODING
2189 xmlGenericError(xmlGenericErrorContext,
2190 "converted %d bytes to %d bytes of input\n",
2191 c_in, c_out);
2192 #endif
2193 break;
2194 case -1:
2195 #ifdef DEBUG_ENCODING
2196 xmlGenericError(xmlGenericErrorContext,
2197 "converted %d bytes to %d bytes of input, %d left\n",
2198 c_in, c_out, (int)xmlBufUse(in));
2199 #endif
2200 break;
2201 case -3:
2202 #ifdef DEBUG_ENCODING
2203 xmlGenericError(xmlGenericErrorContext,
2204 "converted %d bytes to %d bytes of input, %d left\n",
2205 c_in, c_out, (int)xmlBufUse(in));
2206 #endif
2207 break;
2208 case -2: {
2209 char buf[50];
2210 const xmlChar *content = xmlBufContent(in);
2212 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2213 content[0], content[1],
2214 content[2], content[3]);
2215 buf[49] = 0;
2216 xmlEncodingErr(XML_I18N_CONV_FAILED,
2217 "input conversion failed due to input error, bytes %s\n",
2218 buf);
2222 * Ignore when input buffer is not on a boundary
2224 if (ret == -3)
2225 ret = 0;
2226 return (c_out? c_out : ret);
2230 * xmlCharEncInFunc:
2231 * @handler: char encoding transformation data structure
2232 * @out: an xmlBuffer for the output.
2233 * @in: an xmlBuffer for the input
2235 * Generic front-end for the encoding handler input function
2237 * Returns the number of byte written if success, or
2238 * -1 general error
2239 * -2 if the transcoding fails (for *in is not valid utf8 string or
2240 * the result of transformation can't fit into the encoding we want), or
2243 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2244 xmlBufferPtr in)
2246 int ret;
2247 int written;
2248 int toconv;
2250 if (handler == NULL)
2251 return (-1);
2252 if (out == NULL)
2253 return (-1);
2254 if (in == NULL)
2255 return (-1);
2257 toconv = in->use;
2258 if (toconv == 0)
2259 return (0);
2260 written = out->size - out->use -1; /* count '\0' */
2261 if (toconv * 2 >= written) {
2262 xmlBufferGrow(out, out->size + toconv * 2);
2263 written = out->size - out->use - 1;
2265 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2266 in->content, &toconv, 1);
2267 xmlBufferShrink(in, toconv);
2268 out->use += written;
2269 out->content[out->use] = 0;
2270 if (ret == -1)
2271 ret = -3;
2273 switch (ret) {
2274 case 0:
2275 #ifdef DEBUG_ENCODING
2276 xmlGenericError(xmlGenericErrorContext,
2277 "converted %d bytes to %d bytes of input\n",
2278 toconv, written);
2279 #endif
2280 break;
2281 case -1:
2282 #ifdef DEBUG_ENCODING
2283 xmlGenericError(xmlGenericErrorContext,
2284 "converted %d bytes to %d bytes of input, %d left\n",
2285 toconv, written, in->use);
2286 #endif
2287 break;
2288 case -3:
2289 #ifdef DEBUG_ENCODING
2290 xmlGenericError(xmlGenericErrorContext,
2291 "converted %d bytes to %d bytes of input, %d left\n",
2292 toconv, written, in->use);
2293 #endif
2294 break;
2295 case -2: {
2296 char buf[50];
2298 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2299 in->content[0], in->content[1],
2300 in->content[2], in->content[3]);
2301 buf[49] = 0;
2302 xmlEncodingErr(XML_I18N_CONV_FAILED,
2303 "input conversion failed due to input error, bytes %s\n",
2304 buf);
2308 * Ignore when input buffer is not on a boundary
2310 if (ret == -3)
2311 ret = 0;
2312 return (written? written : ret);
2315 #ifdef LIBXML_OUTPUT_ENABLED
2317 * xmlCharEncOutput:
2318 * @output: a parser output buffer
2319 * @init: is this an initialization call without data
2321 * Generic front-end for the encoding handler on parser output
2322 * a first call with @init == 1 has to be made first to initiate the
2323 * output in case of non-stateless encoding needing to initiate their
2324 * state or the output (like the BOM in UTF16).
2325 * In case of UTF8 sequence conversion errors for the given encoder,
2326 * the content will be automatically remapped to a CharRef sequence.
2328 * Returns the number of byte written if success, or
2329 * -1 general error
2330 * -2 if the transcoding fails (for *in is not valid utf8 string or
2331 * the result of transformation can't fit into the encoding we want), or
2334 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2336 int ret;
2337 size_t written;
2338 int writtentot = 0;
2339 size_t toconv;
2340 int c_in;
2341 int c_out;
2342 xmlBufPtr in;
2343 xmlBufPtr out;
2345 if ((output == NULL) || (output->encoder == NULL) ||
2346 (output->buffer == NULL) || (output->conv == NULL))
2347 return (-1);
2348 out = output->conv;
2349 in = output->buffer;
2351 retry:
2353 written = xmlBufAvail(out);
2356 * First specific handling of the initialization call
2358 if (init) {
2359 c_in = 0;
2360 c_out = written;
2361 /* TODO: Check return value. */
2362 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2363 NULL, &c_in);
2364 xmlBufAddLen(out, c_out);
2365 #ifdef DEBUG_ENCODING
2366 xmlGenericError(xmlGenericErrorContext,
2367 "initialized encoder\n");
2368 #endif
2369 return(c_out);
2373 * Conversion itself.
2375 toconv = xmlBufUse(in);
2376 if (toconv == 0)
2377 return (writtentot);
2378 if (toconv > 64 * 1024)
2379 toconv = 64 * 1024;
2380 if (toconv * 4 >= written) {
2381 xmlBufGrow(out, toconv * 4);
2382 written = xmlBufAvail(out);
2384 if (written > 256 * 1024)
2385 written = 256 * 1024;
2387 c_in = toconv;
2388 c_out = written;
2389 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2390 xmlBufContent(in), &c_in);
2391 xmlBufShrink(in, c_in);
2392 xmlBufAddLen(out, c_out);
2393 writtentot += c_out;
2394 if (ret == -1) {
2395 if (c_out > 0) {
2396 /* Can be a limitation of iconv or uconv */
2397 goto retry;
2399 ret = -3;
2403 * Attempt to handle error cases
2405 switch (ret) {
2406 case 0:
2407 #ifdef DEBUG_ENCODING
2408 xmlGenericError(xmlGenericErrorContext,
2409 "converted %d bytes to %d bytes of output\n",
2410 c_in, c_out);
2411 #endif
2412 break;
2413 case -1:
2414 #ifdef DEBUG_ENCODING
2415 xmlGenericError(xmlGenericErrorContext,
2416 "output conversion failed by lack of space\n");
2417 #endif
2418 break;
2419 case -3:
2420 #ifdef DEBUG_ENCODING
2421 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2422 c_in, c_out, (int) xmlBufUse(in));
2423 #endif
2424 break;
2425 case -4:
2426 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2427 "xmlCharEncOutFunc: no output function !\n", NULL);
2428 ret = -1;
2429 break;
2430 case -2: {
2431 xmlChar charref[20];
2432 int len = xmlBufUse(in);
2433 xmlChar *content = xmlBufContent(in);
2434 int cur, charrefLen;
2436 cur = xmlGetUTF8Char(content, &len);
2437 if (cur <= 0)
2438 break;
2440 #ifdef DEBUG_ENCODING
2441 xmlGenericError(xmlGenericErrorContext,
2442 "handling output conversion error\n");
2443 xmlGenericError(xmlGenericErrorContext,
2444 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2445 content[0], content[1],
2446 content[2], content[3]);
2447 #endif
2449 * Removes the UTF8 sequence, and replace it by a charref
2450 * and continue the transcoding phase, hoping the error
2451 * did not mangle the encoder state.
2453 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2454 "&#%d;", cur);
2455 xmlBufShrink(in, len);
2456 xmlBufGrow(out, charrefLen * 4);
2457 c_out = xmlBufAvail(out);
2458 c_in = charrefLen;
2459 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2460 charref, &c_in);
2462 if ((ret < 0) || (c_in != charrefLen)) {
2463 char buf[50];
2465 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2466 content[0], content[1],
2467 content[2], content[3]);
2468 buf[49] = 0;
2469 xmlEncodingErr(XML_I18N_CONV_FAILED,
2470 "output conversion failed due to conv error, bytes %s\n",
2471 buf);
2472 content[0] = ' ';
2473 break;
2476 xmlBufAddLen(out, c_out);
2477 writtentot += c_out;
2478 goto retry;
2481 return(writtentot ? writtentot : ret);
2483 #endif
2486 * xmlCharEncOutFunc:
2487 * @handler: char encoding transformation data structure
2488 * @out: an xmlBuffer for the output.
2489 * @in: an xmlBuffer for the input
2491 * Generic front-end for the encoding handler output function
2492 * a first call with @in == NULL has to be made firs to initiate the
2493 * output in case of non-stateless encoding needing to initiate their
2494 * state or the output (like the BOM in UTF16).
2495 * In case of UTF8 sequence conversion errors for the given encoder,
2496 * the content will be automatically remapped to a CharRef sequence.
2498 * Returns the number of byte written if success, or
2499 * -1 general error
2500 * -2 if the transcoding fails (for *in is not valid utf8 string or
2501 * the result of transformation can't fit into the encoding we want), or
2504 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2505 xmlBufferPtr in) {
2506 int ret;
2507 int written;
2508 int writtentot = 0;
2509 int toconv;
2511 if (handler == NULL) return(-1);
2512 if (out == NULL) return(-1);
2514 retry:
2516 written = out->size - out->use;
2518 if (written > 0)
2519 written--; /* Gennady: count '/0' */
2522 * First specific handling of in = NULL, i.e. the initialization call
2524 if (in == NULL) {
2525 toconv = 0;
2526 /* TODO: Check return value. */
2527 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2528 NULL, &toconv);
2529 out->use += written;
2530 out->content[out->use] = 0;
2531 #ifdef DEBUG_ENCODING
2532 xmlGenericError(xmlGenericErrorContext,
2533 "initialized encoder\n");
2534 #endif
2535 return(0);
2539 * Conversion itself.
2541 toconv = in->use;
2542 if (toconv == 0)
2543 return(0);
2544 if (toconv * 4 >= written) {
2545 xmlBufferGrow(out, toconv * 4);
2546 written = out->size - out->use - 1;
2548 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2549 in->content, &toconv);
2550 xmlBufferShrink(in, toconv);
2551 out->use += written;
2552 writtentot += written;
2553 out->content[out->use] = 0;
2554 if (ret == -1) {
2555 if (written > 0) {
2556 /* Can be a limitation of iconv or uconv */
2557 goto retry;
2559 ret = -3;
2563 * Attempt to handle error cases
2565 switch (ret) {
2566 case 0:
2567 #ifdef DEBUG_ENCODING
2568 xmlGenericError(xmlGenericErrorContext,
2569 "converted %d bytes to %d bytes of output\n",
2570 toconv, written);
2571 #endif
2572 break;
2573 case -1:
2574 #ifdef DEBUG_ENCODING
2575 xmlGenericError(xmlGenericErrorContext,
2576 "output conversion failed by lack of space\n");
2577 #endif
2578 break;
2579 case -3:
2580 #ifdef DEBUG_ENCODING
2581 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2582 toconv, written, in->use);
2583 #endif
2584 break;
2585 case -4:
2586 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2587 "xmlCharEncOutFunc: no output function !\n", NULL);
2588 ret = -1;
2589 break;
2590 case -2: {
2591 xmlChar charref[20];
2592 int len = in->use;
2593 const xmlChar *utf = (const xmlChar *) in->content;
2594 int cur, charrefLen;
2596 cur = xmlGetUTF8Char(utf, &len);
2597 if (cur <= 0)
2598 break;
2600 #ifdef DEBUG_ENCODING
2601 xmlGenericError(xmlGenericErrorContext,
2602 "handling output conversion error\n");
2603 xmlGenericError(xmlGenericErrorContext,
2604 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2605 in->content[0], in->content[1],
2606 in->content[2], in->content[3]);
2607 #endif
2609 * Removes the UTF8 sequence, and replace it by a charref
2610 * and continue the transcoding phase, hoping the error
2611 * did not mangle the encoder state.
2613 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2614 "&#%d;", cur);
2615 xmlBufferShrink(in, len);
2616 xmlBufferGrow(out, charrefLen * 4);
2617 written = out->size - out->use - 1;
2618 toconv = charrefLen;
2619 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2620 charref, &toconv);
2622 if ((ret < 0) || (toconv != charrefLen)) {
2623 char buf[50];
2625 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2626 in->content[0], in->content[1],
2627 in->content[2], in->content[3]);
2628 buf[49] = 0;
2629 xmlEncodingErr(XML_I18N_CONV_FAILED,
2630 "output conversion failed due to conv error, bytes %s\n",
2631 buf);
2632 in->content[0] = ' ';
2633 break;
2636 out->use += written;
2637 writtentot += written;
2638 out->content[out->use] = 0;
2639 goto retry;
2642 return(writtentot ? writtentot : ret);
2646 * xmlCharEncCloseFunc:
2647 * @handler: char encoding transformation data structure
2649 * Generic front-end for encoding handler close function
2651 * Returns 0 if success, or -1 in case of error
2654 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2655 int ret = 0;
2656 int tofree = 0;
2657 int i = 0;
2659 if (handler == NULL) return(-1);
2661 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2662 if (handler == &defaultHandlers[i])
2663 return(0);
2666 if (handlers != NULL) {
2667 for (i = 0;i < nbCharEncodingHandler; i++) {
2668 if (handler == handlers[i])
2669 return(0);
2672 #ifdef LIBXML_ICONV_ENABLED
2674 * Iconv handlers can be used only once, free the whole block.
2675 * and the associated icon resources.
2677 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2678 tofree = 1;
2679 if (handler->iconv_out != NULL) {
2680 if (iconv_close(handler->iconv_out))
2681 ret = -1;
2682 handler->iconv_out = NULL;
2684 if (handler->iconv_in != NULL) {
2685 if (iconv_close(handler->iconv_in))
2686 ret = -1;
2687 handler->iconv_in = NULL;
2690 #endif /* LIBXML_ICONV_ENABLED */
2691 #ifdef LIBXML_ICU_ENABLED
2692 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2693 tofree = 1;
2694 if (handler->uconv_out != NULL) {
2695 closeIcuConverter(handler->uconv_out);
2696 handler->uconv_out = NULL;
2698 if (handler->uconv_in != NULL) {
2699 closeIcuConverter(handler->uconv_in);
2700 handler->uconv_in = NULL;
2703 #endif
2704 if (tofree) {
2705 /* free up only dynamic handlers iconv/uconv */
2706 if (handler->name != NULL)
2707 xmlFree(handler->name);
2708 handler->name = NULL;
2709 xmlFree(handler);
2711 #ifdef DEBUG_ENCODING
2712 if (ret)
2713 xmlGenericError(xmlGenericErrorContext,
2714 "failed to close the encoding handler\n");
2715 else
2716 xmlGenericError(xmlGenericErrorContext,
2717 "closed the encoding handler\n");
2718 #endif
2720 return(ret);
2724 * xmlByteConsumed:
2725 * @ctxt: an XML parser context
2727 * This function provides the current index of the parser relative
2728 * to the start of the current entity. This function is computed in
2729 * bytes from the beginning starting at zero and finishing at the
2730 * size in byte of the file if parsing a file. The function is
2731 * of constant cost if the input is UTF-8 but can be costly if run
2732 * on non-UTF-8 input.
2734 * Returns the index in bytes from the beginning of the entity or -1
2735 * in case the index could not be computed.
2737 long
2738 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2739 xmlParserInputPtr in;
2741 if (ctxt == NULL) return(-1);
2742 in = ctxt->input;
2743 if (in == NULL) return(-1);
2744 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2745 unsigned int unused = 0;
2746 xmlCharEncodingHandler * handler = in->buf->encoder;
2748 * Encoding conversion, compute the number of unused original
2749 * bytes from the input not consumed and subtract that from
2750 * the raw consumed value, this is not a cheap operation
2752 if (in->end - in->cur > 0) {
2753 unsigned char convbuf[32000];
2754 const unsigned char *cur = (const unsigned char *)in->cur;
2755 int toconv = in->end - in->cur, written = 32000;
2757 int ret;
2759 do {
2760 toconv = in->end - cur;
2761 written = 32000;
2762 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2763 cur, &toconv);
2764 if (ret < 0) {
2765 if (written > 0)
2766 ret = -2;
2767 else
2768 return(-1);
2770 unused += written;
2771 cur += toconv;
2772 } while (ret == -2);
2774 if (in->buf->rawconsumed < unused)
2775 return(-1);
2776 return(in->buf->rawconsumed - unused);
2778 return(in->consumed + (in->cur - in->base));
2781 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2782 #ifdef LIBXML_ISO8859X_ENABLED
2785 * UTF8ToISO8859x:
2786 * @out: a pointer to an array of bytes to store the result
2787 * @outlen: the length of @out
2788 * @in: a pointer to an array of UTF-8 chars
2789 * @inlen: the length of @in
2790 * @xlattable: the 2-level transcoding table
2792 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2793 * block of chars out.
2795 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2796 * The value of @inlen after return is the number of octets consumed
2797 * as the return value is positive, else unpredictable.
2798 * The value of @outlen after return is the number of octets consumed.
2800 static int
2801 UTF8ToISO8859x(unsigned char* out, int *outlen,
2802 const unsigned char* in, int *inlen,
2803 const unsigned char* const xlattable) {
2804 const unsigned char* outstart = out;
2805 const unsigned char* inend;
2806 const unsigned char* instart = in;
2807 const unsigned char* processed = in;
2809 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2810 (xlattable == NULL))
2811 return(-1);
2812 if (in == NULL) {
2814 * initialization nothing to do
2816 *outlen = 0;
2817 *inlen = 0;
2818 return(0);
2820 inend = in + (*inlen);
2821 while (in < inend) {
2822 unsigned char d = *in++;
2823 if (d < 0x80) {
2824 *out++ = d;
2825 } else if (d < 0xC0) {
2826 /* trailing byte in leading position */
2827 *outlen = out - outstart;
2828 *inlen = processed - instart;
2829 return(-2);
2830 } else if (d < 0xE0) {
2831 unsigned char c;
2832 if (!(in < inend)) {
2833 /* trailing byte not in input buffer */
2834 *outlen = out - outstart;
2835 *inlen = processed - instart;
2836 return(-3);
2838 c = *in++;
2839 if ((c & 0xC0) != 0x80) {
2840 /* not a trailing byte */
2841 *outlen = out - outstart;
2842 *inlen = processed - instart;
2843 return(-2);
2845 c = c & 0x3F;
2846 d = d & 0x1F;
2847 d = xlattable [48 + c + xlattable [d] * 64];
2848 if (d == 0) {
2849 /* not in character set */
2850 *outlen = out - outstart;
2851 *inlen = processed - instart;
2852 return(-2);
2854 *out++ = d;
2855 } else if (d < 0xF0) {
2856 unsigned char c1;
2857 unsigned char c2;
2858 if (!(in < inend - 1)) {
2859 /* trailing bytes not in input buffer */
2860 *outlen = out - outstart;
2861 *inlen = processed - instart;
2862 return(-3);
2864 c1 = *in++;
2865 if ((c1 & 0xC0) != 0x80) {
2866 /* not a trailing byte (c1) */
2867 *outlen = out - outstart;
2868 *inlen = processed - instart;
2869 return(-2);
2871 c2 = *in++;
2872 if ((c2 & 0xC0) != 0x80) {
2873 /* not a trailing byte (c2) */
2874 *outlen = out - outstart;
2875 *inlen = processed - instart;
2876 return(-2);
2878 c1 = c1 & 0x3F;
2879 c2 = c2 & 0x3F;
2880 d = d & 0x0F;
2881 d = xlattable [48 + c2 + xlattable [48 + c1 +
2882 xlattable [32 + d] * 64] * 64];
2883 if (d == 0) {
2884 /* not in character set */
2885 *outlen = out - outstart;
2886 *inlen = processed - instart;
2887 return(-2);
2889 *out++ = d;
2890 } else {
2891 /* cannot transcode >= U+010000 */
2892 *outlen = out - outstart;
2893 *inlen = processed - instart;
2894 return(-2);
2896 processed = in;
2898 *outlen = out - outstart;
2899 *inlen = processed - instart;
2900 return(*outlen);
2904 * ISO8859xToUTF8
2905 * @out: a pointer to an array of bytes to store the result
2906 * @outlen: the length of @out
2907 * @in: a pointer to an array of ISO Latin 1 chars
2908 * @inlen: the length of @in
2910 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2911 * block of chars out.
2912 * Returns 0 if success, or -1 otherwise
2913 * The value of @inlen after return is the number of octets consumed
2914 * The value of @outlen after return is the number of octets produced.
2916 static int
2917 ISO8859xToUTF8(unsigned char* out, int *outlen,
2918 const unsigned char* in, int *inlen,
2919 unsigned short const *unicodetable) {
2920 unsigned char* outstart = out;
2921 unsigned char* outend;
2922 const unsigned char* instart = in;
2923 const unsigned char* inend;
2924 const unsigned char* instop;
2925 unsigned int c;
2927 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2928 (in == NULL) || (unicodetable == NULL))
2929 return(-1);
2930 outend = out + *outlen;
2931 inend = in + *inlen;
2932 instop = inend;
2934 while ((in < inend) && (out < outend - 2)) {
2935 if (*in >= 0x80) {
2936 c = unicodetable [*in - 0x80];
2937 if (c == 0) {
2938 /* undefined code point */
2939 *outlen = out - outstart;
2940 *inlen = in - instart;
2941 return (-1);
2943 if (c < 0x800) {
2944 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2945 *out++ = (c & 0x3F) | 0x80;
2946 } else {
2947 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2948 *out++ = ((c >> 6) & 0x3F) | 0x80;
2949 *out++ = (c & 0x3F) | 0x80;
2951 ++in;
2953 if (instop - in > outend - out) instop = in + (outend - out);
2954 while ((*in < 0x80) && (in < instop)) {
2955 *out++ = *in++;
2958 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2959 *out++ = *in++;
2961 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2962 *out++ = *in++;
2964 *outlen = out - outstart;
2965 *inlen = in - instart;
2966 return (*outlen);
2970 /************************************************************************
2971 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2972 ************************************************************************/
2974 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2975 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2976 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2977 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2978 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2979 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2980 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2981 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2982 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2983 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2984 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2985 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2986 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2987 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2988 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2989 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2990 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2993 static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2994 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3002 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3003 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3004 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3005 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3006 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3014 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3015 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3016 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3017 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3018 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3019 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3020 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3023 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3024 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3025 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3026 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3027 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3028 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3029 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3030 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3031 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3032 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3033 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3034 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3035 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3036 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3037 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3038 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3039 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3042 static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3043 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3051 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3052 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3053 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3056 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3070 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3071 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3072 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3073 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3076 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3077 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3078 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3079 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3080 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3081 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3082 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3083 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3084 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3085 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3086 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3087 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3088 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3089 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3090 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3091 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3092 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3095 static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3096 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3104 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3105 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3106 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3107 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3108 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3109 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3110 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3114 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3120 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3121 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3122 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3125 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3126 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3127 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3128 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3129 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3130 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3131 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3132 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3133 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3134 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3135 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3136 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3137 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3138 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3139 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3140 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3141 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3144 static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3145 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3149 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3153 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3154 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3157 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3158 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3159 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3160 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3161 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3175 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3176 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3177 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3178 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3179 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3180 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3181 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3182 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3183 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3184 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3185 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3186 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3187 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3188 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3189 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3190 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3193 static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3194 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3199 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3200 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3202 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3203 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3211 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3212 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3213 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3214 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3220 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3221 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3222 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3223 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3224 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3225 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3226 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3227 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3228 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3229 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3230 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3231 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3232 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3233 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3234 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3235 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3238 static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3239 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3247 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3248 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3249 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3250 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3263 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3264 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3265 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3266 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3273 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3274 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3275 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3276 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3277 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3278 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3279 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3280 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3285 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3286 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3287 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3288 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3291 static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3292 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3300 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3301 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3302 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3303 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3316 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3321 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3326 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3327 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3328 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3329 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3330 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3331 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3332 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3333 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3334 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3335 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3336 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3337 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3338 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3339 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3340 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3341 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3344 static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3345 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3353 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3354 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3355 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3356 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3357 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3358 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3359 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3371 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3372 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3373 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3374 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3375 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3376 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3377 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3378 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3379 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3380 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3381 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3382 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3383 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3384 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3385 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3386 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3389 static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3390 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3398 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3399 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3400 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3402 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3404 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3408 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3409 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3418 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3419 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3420 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3423 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3424 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3425 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3426 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3427 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3428 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3429 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3430 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3431 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3432 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3433 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3434 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3435 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3436 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3437 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3438 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3439 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3442 static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3443 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3451 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3452 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3458 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3459 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3460 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3461 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3462 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3467 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3473 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3474 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3475 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3476 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3477 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3478 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3479 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3480 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3481 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3482 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3483 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3484 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3485 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3486 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3487 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3488 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3491 static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3492 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3498 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3500 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3501 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3502 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3503 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3512 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3513 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3514 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3515 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3516 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3517 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3518 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3519 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3520 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3522 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3525 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3526 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3527 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3528 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3529 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3530 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3531 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3532 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3533 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3534 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3535 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3536 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3537 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3538 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3539 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3540 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3541 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3544 static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3545 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3553 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3554 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3555 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3560 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3580 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3585 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3586 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3587 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3590 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3591 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3596 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3597 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3598 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3599 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3602 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3603 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3606 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3609 static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3610 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3620 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3621 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3633 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3634 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3635 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3636 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3639 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3640 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3641 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3642 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3643 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3644 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3645 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3646 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3647 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3648 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3649 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3650 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3651 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3652 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3653 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3654 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3655 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3658 static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3659 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3661 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3667 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3668 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3669 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3670 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3671 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3676 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3678 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3695 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3696 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3697 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3702 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3705 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3706 const unsigned char* in, int *inlen) {
3707 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3709 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3710 const unsigned char* in, int *inlen) {
3711 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3714 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3715 const unsigned char* in, int *inlen) {
3716 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3718 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3719 const unsigned char* in, int *inlen) {
3720 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3723 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3724 const unsigned char* in, int *inlen) {
3725 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3727 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3728 const unsigned char* in, int *inlen) {
3729 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3732 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3733 const unsigned char* in, int *inlen) {
3734 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3736 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3737 const unsigned char* in, int *inlen) {
3738 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3741 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3742 const unsigned char* in, int *inlen) {
3743 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3745 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3746 const unsigned char* in, int *inlen) {
3747 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3750 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3751 const unsigned char* in, int *inlen) {
3752 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3754 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3755 const unsigned char* in, int *inlen) {
3756 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3759 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3760 const unsigned char* in, int *inlen) {
3761 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3763 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3764 const unsigned char* in, int *inlen) {
3765 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3768 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3769 const unsigned char* in, int *inlen) {
3770 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3772 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3773 const unsigned char* in, int *inlen) {
3774 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3777 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3778 const unsigned char* in, int *inlen) {
3779 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3781 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3782 const unsigned char* in, int *inlen) {
3783 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3786 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3787 const unsigned char* in, int *inlen) {
3788 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3790 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3791 const unsigned char* in, int *inlen) {
3792 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3795 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3796 const unsigned char* in, int *inlen) {
3797 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3799 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3800 const unsigned char* in, int *inlen) {
3801 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3804 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3805 const unsigned char* in, int *inlen) {
3806 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3808 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3809 const unsigned char* in, int *inlen) {
3810 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3813 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3814 const unsigned char* in, int *inlen) {
3815 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3817 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3818 const unsigned char* in, int *inlen) {
3819 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3822 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3823 const unsigned char* in, int *inlen) {
3824 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3826 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3827 const unsigned char* in, int *inlen) {
3828 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3831 #endif
3832 #endif