dinput/tests: Support test IOCTLs on the bus control interface.
[wine.git] / libs / xml2 / encoding.c
blobc14c9ff699108a0bd631d4c7ef3f5a913dd73eea
1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
18 * daniel@veillard.com
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
23 #define IN_LIBXML
24 #include "libxml.h"
26 #include <string.h>
27 #include <limits.h>
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
48 #include "buf.h"
49 #include "enc.h"
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
75 static int xmlLittleEndian = 1;
77 /**
78 * xmlEncodingErrMemory:
79 * @extra: extra information
81 * Handle an out of memory condition
83 static void
84 xmlEncodingErrMemory(const char *extra)
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
89 /**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
94 * n encoding error
96 static void LIBXML_ATTR_FORMAT(2,0)
97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
106 openIcuConverter(const char* name, int toUnicode)
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
113 conv->pivot_source = conv->pivot_buf;
114 conv->pivot_target = conv->pivot_buf;
116 conv->uconv = ucnv_open(name, &status);
117 if (U_FAILURE(status))
118 goto error;
120 status = U_ZERO_ERROR;
121 if (toUnicode) {
122 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123 NULL, NULL, NULL, &status);
125 else {
126 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127 NULL, NULL, NULL, &status);
129 if (U_FAILURE(status))
130 goto error;
132 status = U_ZERO_ERROR;
133 conv->utf8 = ucnv_open("UTF-8", &status);
134 if (U_SUCCESS(status))
135 return conv;
137 error:
138 if (conv->uconv)
139 ucnv_close(conv->uconv);
140 xmlFree(conv);
141 return NULL;
144 static void
145 closeIcuConverter(uconv_t *conv)
147 if (conv != NULL) {
148 ucnv_close(conv->uconv);
149 ucnv_close(conv->utf8);
150 xmlFree(conv);
153 #endif /* LIBXML_ICU_ENABLED */
155 /************************************************************************
157 * Conversions To/From UTF8 encoding *
159 ************************************************************************/
162 * asciiToUTF8:
163 * @out: a pointer to an array of bytes to store the result
164 * @outlen: the length of @out
165 * @in: a pointer to an array of ASCII chars
166 * @inlen: the length of @in
168 * Take a block of ASCII chars in and try to convert it to an UTF-8
169 * block of chars out.
170 * Returns 0 if success, or -1 otherwise
171 * The value of @inlen after return is the number of octets consumed
172 * if the return value is positive, else unpredictable.
173 * The value of @outlen after return is the number of octets produced.
175 static int
176 asciiToUTF8(unsigned char* out, int *outlen,
177 const unsigned char* in, int *inlen) {
178 unsigned char* outstart = out;
179 const unsigned char* base = in;
180 const unsigned char* processed = in;
181 unsigned char* outend = out + *outlen;
182 const unsigned char* inend;
183 unsigned int c;
185 inend = in + (*inlen);
186 while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 c= *in++;
189 if (out >= outend)
190 break;
191 if (c < 0x80) {
192 *out++ = c;
193 } else {
194 *outlen = out - outstart;
195 *inlen = processed - base;
196 return(-1);
199 processed = (const unsigned char*) in;
201 *outlen = out - outstart;
202 *inlen = processed - base;
203 return(*outlen);
206 #ifdef LIBXML_OUTPUT_ENABLED
208 * UTF8Toascii:
209 * @out: a pointer to an array of bytes to store the result
210 * @outlen: the length of @out
211 * @in: a pointer to an array of UTF-8 chars
212 * @inlen: the length of @in
214 * Take a block of UTF-8 chars in and try to convert it to an ASCII
215 * block of chars out.
217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218 * The value of @inlen after return is the number of octets consumed
219 * if the return value is positive, else unpredictable.
220 * The value of @outlen after return is the number of octets produced.
222 static int
223 UTF8Toascii(unsigned char* out, int *outlen,
224 const unsigned char* in, int *inlen) {
225 const unsigned char* processed = in;
226 const unsigned char* outend;
227 const unsigned char* outstart = out;
228 const unsigned char* instart = in;
229 const unsigned char* inend;
230 unsigned int c, d;
231 int trailing;
233 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234 if (in == NULL) {
236 * initialization nothing to do
238 *outlen = 0;
239 *inlen = 0;
240 return(0);
242 inend = in + (*inlen);
243 outend = out + (*outlen);
244 while (in < inend) {
245 d = *in++;
246 if (d < 0x80) { c= d; trailing= 0; }
247 else if (d < 0xC0) {
248 /* trailing byte in leading position */
249 *outlen = out - outstart;
250 *inlen = processed - instart;
251 return(-2);
252 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
253 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
254 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
255 else {
256 /* no chance for this in Ascii */
257 *outlen = out - outstart;
258 *inlen = processed - instart;
259 return(-2);
262 if (inend - in < trailing) {
263 break;
266 for ( ; trailing; trailing--) {
267 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 break;
269 c <<= 6;
270 c |= d & 0x3F;
273 /* assertion: c is a single UTF-4 value */
274 if (c < 0x80) {
275 if (out >= outend)
276 break;
277 *out++ = c;
278 } else {
279 /* no chance for this in Ascii */
280 *outlen = out - outstart;
281 *inlen = processed - instart;
282 return(-2);
284 processed = in;
286 *outlen = out - outstart;
287 *inlen = processed - instart;
288 return(*outlen);
290 #endif /* LIBXML_OUTPUT_ENABLED */
293 * isolat1ToUTF8:
294 * @out: a pointer to an array of bytes to store the result
295 * @outlen: the length of @out
296 * @in: a pointer to an array of ISO Latin 1 chars
297 * @inlen: the length of @in
299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300 * block of chars out.
301 * Returns the number of bytes written if success, or -1 otherwise
302 * The value of @inlen after return is the number of octets consumed
303 * if the return value is positive, else unpredictable.
304 * The value of @outlen after return is the number of octets produced.
307 isolat1ToUTF8(unsigned char* out, int *outlen,
308 const unsigned char* in, int *inlen) {
309 unsigned char* outstart = out;
310 const unsigned char* base = in;
311 unsigned char* outend;
312 const unsigned char* inend;
313 const unsigned char* instop;
315 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 return(-1);
318 outend = out + *outlen;
319 inend = in + (*inlen);
320 instop = inend;
322 while ((in < inend) && (out < outend - 1)) {
323 if (*in >= 0x80) {
324 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
325 *out++ = ((*in) & 0x3F) | 0x80;
326 ++in;
328 if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 while ((in < instop) && (*in < 0x80)) {
330 *out++ = *in++;
333 if ((in < inend) && (out < outend) && (*in < 0x80)) {
334 *out++ = *in++;
336 *outlen = out - outstart;
337 *inlen = in - base;
338 return(*outlen);
342 * UTF8ToUTF8:
343 * @out: a pointer to an array of bytes to store the result
344 * @outlen: the length of @out
345 * @inb: a pointer to an array of UTF-8 chars
346 * @inlenb: the length of @in in UTF-8 chars
348 * No op copy operation for UTF8 handling.
350 * Returns the number of bytes written, or -1 if lack of space.
351 * The value of *inlen after return is the number of octets consumed
352 * if the return value is positive, else unpredictable.
354 static int
355 UTF8ToUTF8(unsigned char* out, int *outlen,
356 const unsigned char* inb, int *inlenb)
358 int len;
360 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 return(-1);
362 if (inb == NULL) {
363 /* inb == NULL means output is initialized. */
364 *outlen = 0;
365 *inlenb = 0;
366 return(0);
368 if (*outlen > *inlenb) {
369 len = *inlenb;
370 } else {
371 len = *outlen;
373 if (len < 0)
374 return(-1);
377 * FIXME: Conversion functions must assure valid UTF-8, so we have
378 * to check for UTF-8 validity. Preferably, this converter shouldn't
379 * be used at all.
381 memcpy(out, inb, len);
383 *outlen = len;
384 *inlenb = len;
385 return(*outlen);
389 #ifdef LIBXML_OUTPUT_ENABLED
391 * UTF8Toisolat1:
392 * @out: a pointer to an array of bytes to store the result
393 * @outlen: the length of @out
394 * @in: a pointer to an array of UTF-8 chars
395 * @inlen: the length of @in
397 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398 * block of chars out.
400 * Returns the number of bytes written if success, -2 if the transcoding fails,
401 or -1 otherwise
402 * The value of @inlen after return is the number of octets consumed
403 * if the return value is positive, else unpredictable.
404 * The value of @outlen after return is the number of octets produced.
407 UTF8Toisolat1(unsigned char* out, int *outlen,
408 const unsigned char* in, int *inlen) {
409 const unsigned char* processed = in;
410 const unsigned char* outend;
411 const unsigned char* outstart = out;
412 const unsigned char* instart = in;
413 const unsigned char* inend;
414 unsigned int c, d;
415 int trailing;
417 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418 if (in == NULL) {
420 * initialization nothing to do
422 *outlen = 0;
423 *inlen = 0;
424 return(0);
426 inend = in + (*inlen);
427 outend = out + (*outlen);
428 while (in < inend) {
429 d = *in++;
430 if (d < 0x80) { c= d; trailing= 0; }
431 else if (d < 0xC0) {
432 /* trailing byte in leading position */
433 *outlen = out - outstart;
434 *inlen = processed - instart;
435 return(-2);
436 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
437 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
438 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
439 else {
440 /* no chance for this in IsoLat1 */
441 *outlen = out - outstart;
442 *inlen = processed - instart;
443 return(-2);
446 if (inend - in < trailing) {
447 break;
450 for ( ; trailing; trailing--) {
451 if (in >= inend)
452 break;
453 if (((d= *in++) & 0xC0) != 0x80) {
454 *outlen = out - outstart;
455 *inlen = processed - instart;
456 return(-2);
458 c <<= 6;
459 c |= d & 0x3F;
462 /* assertion: c is a single UTF-4 value */
463 if (c <= 0xFF) {
464 if (out >= outend)
465 break;
466 *out++ = c;
467 } else {
468 /* no chance for this in IsoLat1 */
469 *outlen = out - outstart;
470 *inlen = processed - instart;
471 return(-2);
473 processed = in;
475 *outlen = out - outstart;
476 *inlen = processed - instart;
477 return(*outlen);
479 #endif /* LIBXML_OUTPUT_ENABLED */
482 * UTF16LEToUTF8:
483 * @out: a pointer to an array of bytes to store the result
484 * @outlen: the length of @out
485 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
486 * @inlenb: the length of @in in UTF-16LE chars
488 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489 * block of chars out. This function assumes the endian property
490 * is the same between the native type of this machine and the
491 * inputed one.
493 * Returns the number of bytes written, or -1 if lack of space, or -2
494 * if the transcoding fails (if *in is not a valid utf16 string)
495 * The value of *inlen after return is the number of octets consumed
496 * if the return value is positive, else unpredictable.
498 static int
499 UTF16LEToUTF8(unsigned char* out, int *outlen,
500 const unsigned char* inb, int *inlenb)
502 unsigned char* outstart = out;
503 const unsigned char* processed = inb;
504 unsigned char* outend;
505 unsigned short* in = (unsigned short*) inb;
506 unsigned short* inend;
507 unsigned int c, d, inlen;
508 unsigned char *tmp;
509 int bits;
511 if (*outlen == 0) {
512 *inlenb = 0;
513 return(0);
515 outend = out + *outlen;
516 if ((*inlenb % 2) == 1)
517 (*inlenb)--;
518 inlen = *inlenb / 2;
519 inend = in + inlen;
520 while ((in < inend) && (out - outstart + 5 < *outlen)) {
521 if (xmlLittleEndian) {
522 c= *in++;
523 } else {
524 tmp = (unsigned char *) in;
525 c = *tmp++;
526 c = c | (((unsigned int)*tmp) << 8);
527 in++;
529 if ((c & 0xFC00) == 0xD800) { /* surrogates */
530 if (in >= inend) { /* handle split mutli-byte characters */
531 break;
533 if (xmlLittleEndian) {
534 d = *in++;
535 } else {
536 tmp = (unsigned char *) in;
537 d = *tmp++;
538 d = d | (((unsigned int)*tmp) << 8);
539 in++;
541 if ((d & 0xFC00) == 0xDC00) {
542 c &= 0x03FF;
543 c <<= 10;
544 c |= d & 0x03FF;
545 c += 0x10000;
547 else {
548 *outlen = out - outstart;
549 *inlenb = processed - inb;
550 return(-2);
554 /* assertion: c is a single UTF-4 value */
555 if (out >= outend)
556 break;
557 if (c < 0x80) { *out++= c; bits= -6; }
558 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
559 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
560 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
562 for ( ; bits >= 0; bits-= 6) {
563 if (out >= outend)
564 break;
565 *out++= ((c >> bits) & 0x3F) | 0x80;
567 processed = (const unsigned char*) in;
569 *outlen = out - outstart;
570 *inlenb = processed - inb;
571 return(*outlen);
574 #ifdef LIBXML_OUTPUT_ENABLED
576 * UTF8ToUTF16LE:
577 * @outb: a pointer to an array of bytes to store the result
578 * @outlen: the length of @outb
579 * @in: a pointer to an array of UTF-8 chars
580 * @inlen: the length of @in
582 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583 * block of chars out.
585 * Returns the number of bytes written, or -1 if lack of space, or -2
586 * if the transcoding failed.
588 static int
589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590 const unsigned char* in, int *inlen)
592 unsigned short* out = (unsigned short*) outb;
593 const unsigned char* processed = in;
594 const unsigned char *const instart = in;
595 unsigned short* outstart= out;
596 unsigned short* outend;
597 const unsigned char* inend;
598 unsigned int c, d;
599 int trailing;
600 unsigned char *tmp;
601 unsigned short tmp1, tmp2;
603 /* UTF16LE encoding has no BOM */
604 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605 if (in == NULL) {
606 *outlen = 0;
607 *inlen = 0;
608 return(0);
610 inend= in + *inlen;
611 outend = out + (*outlen / 2);
612 while (in < inend) {
613 d= *in++;
614 if (d < 0x80) { c= d; trailing= 0; }
615 else if (d < 0xC0) {
616 /* trailing byte in leading position */
617 *outlen = (out - outstart) * 2;
618 *inlen = processed - instart;
619 return(-2);
620 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
621 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
622 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
623 else {
624 /* no chance for this in UTF-16 */
625 *outlen = (out - outstart) * 2;
626 *inlen = processed - instart;
627 return(-2);
630 if (inend - in < trailing) {
631 break;
634 for ( ; trailing; trailing--) {
635 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636 break;
637 c <<= 6;
638 c |= d & 0x3F;
641 /* assertion: c is a single UTF-4 value */
642 if (c < 0x10000) {
643 if (out >= outend)
644 break;
645 if (xmlLittleEndian) {
646 *out++ = c;
647 } else {
648 tmp = (unsigned char *) out;
649 *tmp = c ;
650 *(tmp + 1) = c >> 8 ;
651 out++;
654 else if (c < 0x110000) {
655 if (out+1 >= outend)
656 break;
657 c -= 0x10000;
658 if (xmlLittleEndian) {
659 *out++ = 0xD800 | (c >> 10);
660 *out++ = 0xDC00 | (c & 0x03FF);
661 } else {
662 tmp1 = 0xD800 | (c >> 10);
663 tmp = (unsigned char *) out;
664 *tmp = (unsigned char) tmp1;
665 *(tmp + 1) = tmp1 >> 8;
666 out++;
668 tmp2 = 0xDC00 | (c & 0x03FF);
669 tmp = (unsigned char *) out;
670 *tmp = (unsigned char) tmp2;
671 *(tmp + 1) = tmp2 >> 8;
672 out++;
675 else
676 break;
677 processed = in;
679 *outlen = (out - outstart) * 2;
680 *inlen = processed - instart;
681 return(*outlen);
685 * UTF8ToUTF16:
686 * @outb: a pointer to an array of bytes to store the result
687 * @outlen: the length of @outb
688 * @in: a pointer to an array of UTF-8 chars
689 * @inlen: the length of @in
691 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692 * block of chars out.
694 * Returns the number of bytes written, or -1 if lack of space, or -2
695 * if the transcoding failed.
697 static int
698 UTF8ToUTF16(unsigned char* outb, int *outlen,
699 const unsigned char* in, int *inlen)
701 if (in == NULL) {
703 * initialization, add the Byte Order Mark for UTF-16LE
705 if (*outlen >= 2) {
706 outb[0] = 0xFF;
707 outb[1] = 0xFE;
708 *outlen = 2;
709 *inlen = 0;
710 #ifdef DEBUG_ENCODING
711 xmlGenericError(xmlGenericErrorContext,
712 "Added FFFE Byte Order Mark\n");
713 #endif
714 return(2);
716 *outlen = 0;
717 *inlen = 0;
718 return(0);
720 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
722 #endif /* LIBXML_OUTPUT_ENABLED */
725 * UTF16BEToUTF8:
726 * @out: a pointer to an array of bytes to store the result
727 * @outlen: the length of @out
728 * @inb: a pointer to an array of UTF-16 passed as a byte array
729 * @inlenb: the length of @in in UTF-16 chars
731 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732 * block of chars out. This function assumes the endian property
733 * is the same between the native type of this machine and the
734 * inputed one.
736 * Returns the number of bytes written, or -1 if lack of space, or -2
737 * if the transcoding fails (if *in is not a valid utf16 string)
738 * The value of *inlen after return is the number of octets consumed
739 * if the return value is positive, else unpredictable.
741 static int
742 UTF16BEToUTF8(unsigned char* out, int *outlen,
743 const unsigned char* inb, int *inlenb)
745 unsigned char* outstart = out;
746 const unsigned char* processed = inb;
747 unsigned char* outend;
748 unsigned short* in = (unsigned short*) inb;
749 unsigned short* inend;
750 unsigned int c, d, inlen;
751 unsigned char *tmp;
752 int bits;
754 if (*outlen == 0) {
755 *inlenb = 0;
756 return(0);
758 outend = out + *outlen;
759 if ((*inlenb % 2) == 1)
760 (*inlenb)--;
761 inlen = *inlenb / 2;
762 inend= in + inlen;
763 while ((in < inend) && (out - outstart + 5 < *outlen)) {
764 if (xmlLittleEndian) {
765 tmp = (unsigned char *) in;
766 c = *tmp++;
767 c = (c << 8) | (unsigned int) *tmp;
768 in++;
769 } else {
770 c= *in++;
772 if ((c & 0xFC00) == 0xD800) { /* surrogates */
773 if (in >= inend) { /* handle split mutli-byte characters */
774 break;
776 if (xmlLittleEndian) {
777 tmp = (unsigned char *) in;
778 d = *tmp++;
779 d = (d << 8) | (unsigned int) *tmp;
780 in++;
781 } else {
782 d= *in++;
784 if ((d & 0xFC00) == 0xDC00) {
785 c &= 0x03FF;
786 c <<= 10;
787 c |= d & 0x03FF;
788 c += 0x10000;
790 else {
791 *outlen = out - outstart;
792 *inlenb = processed - inb;
793 return(-2);
797 /* assertion: c is a single UTF-4 value */
798 if (out >= outend)
799 break;
800 if (c < 0x80) { *out++= c; bits= -6; }
801 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
802 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
803 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
805 for ( ; bits >= 0; bits-= 6) {
806 if (out >= outend)
807 break;
808 *out++= ((c >> bits) & 0x3F) | 0x80;
810 processed = (const unsigned char*) in;
812 *outlen = out - outstart;
813 *inlenb = processed - inb;
814 return(*outlen);
817 #ifdef LIBXML_OUTPUT_ENABLED
819 * UTF8ToUTF16BE:
820 * @outb: a pointer to an array of bytes to store the result
821 * @outlen: the length of @outb
822 * @in: a pointer to an array of UTF-8 chars
823 * @inlen: the length of @in
825 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
826 * block of chars out.
828 * Returns the number of byte written, or -1 by lack of space, or -2
829 * if the transcoding failed.
831 static int
832 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
833 const unsigned char* in, int *inlen)
835 unsigned short* out = (unsigned short*) outb;
836 const unsigned char* processed = in;
837 const unsigned char *const instart = in;
838 unsigned short* outstart= out;
839 unsigned short* outend;
840 const unsigned char* inend;
841 unsigned int c, d;
842 int trailing;
843 unsigned char *tmp;
844 unsigned short tmp1, tmp2;
846 /* UTF-16BE has no BOM */
847 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
848 if (in == NULL) {
849 *outlen = 0;
850 *inlen = 0;
851 return(0);
853 inend= in + *inlen;
854 outend = out + (*outlen / 2);
855 while (in < inend) {
856 d= *in++;
857 if (d < 0x80) { c= d; trailing= 0; }
858 else if (d < 0xC0) {
859 /* trailing byte in leading position */
860 *outlen = out - outstart;
861 *inlen = processed - instart;
862 return(-2);
863 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
864 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
865 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
866 else {
867 /* no chance for this in UTF-16 */
868 *outlen = out - outstart;
869 *inlen = processed - instart;
870 return(-2);
873 if (inend - in < trailing) {
874 break;
877 for ( ; trailing; trailing--) {
878 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
879 c <<= 6;
880 c |= d & 0x3F;
883 /* assertion: c is a single UTF-4 value */
884 if (c < 0x10000) {
885 if (out >= outend) break;
886 if (xmlLittleEndian) {
887 tmp = (unsigned char *) out;
888 *tmp = c >> 8;
889 *(tmp + 1) = c;
890 out++;
891 } else {
892 *out++ = c;
895 else if (c < 0x110000) {
896 if (out+1 >= outend) break;
897 c -= 0x10000;
898 if (xmlLittleEndian) {
899 tmp1 = 0xD800 | (c >> 10);
900 tmp = (unsigned char *) out;
901 *tmp = tmp1 >> 8;
902 *(tmp + 1) = (unsigned char) tmp1;
903 out++;
905 tmp2 = 0xDC00 | (c & 0x03FF);
906 tmp = (unsigned char *) out;
907 *tmp = tmp2 >> 8;
908 *(tmp + 1) = (unsigned char) tmp2;
909 out++;
910 } else {
911 *out++ = 0xD800 | (c >> 10);
912 *out++ = 0xDC00 | (c & 0x03FF);
915 else
916 break;
917 processed = in;
919 *outlen = (out - outstart) * 2;
920 *inlen = processed - instart;
921 return(*outlen);
923 #endif /* LIBXML_OUTPUT_ENABLED */
925 /************************************************************************
927 * Generic encoding handling routines *
929 ************************************************************************/
932 * xmlDetectCharEncoding:
933 * @in: a pointer to the first bytes of the XML entity, must be at least
934 * 2 bytes long (at least 4 if encoding is UTF4 variant).
935 * @len: pointer to the length of the buffer
937 * Guess the encoding of the entity using the first bytes of the entity content
938 * according to the non-normative appendix F of the XML-1.0 recommendation.
940 * Returns one of the XML_CHAR_ENCODING_... values.
942 xmlCharEncoding
943 xmlDetectCharEncoding(const unsigned char* in, int len)
945 if (in == NULL)
946 return(XML_CHAR_ENCODING_NONE);
947 if (len >= 4) {
948 if ((in[0] == 0x00) && (in[1] == 0x00) &&
949 (in[2] == 0x00) && (in[3] == 0x3C))
950 return(XML_CHAR_ENCODING_UCS4BE);
951 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 (in[2] == 0x00) && (in[3] == 0x00))
953 return(XML_CHAR_ENCODING_UCS4LE);
954 if ((in[0] == 0x00) && (in[1] == 0x00) &&
955 (in[2] == 0x3C) && (in[3] == 0x00))
956 return(XML_CHAR_ENCODING_UCS4_2143);
957 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
958 (in[2] == 0x00) && (in[3] == 0x00))
959 return(XML_CHAR_ENCODING_UCS4_3412);
960 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
961 (in[2] == 0xA7) && (in[3] == 0x94))
962 return(XML_CHAR_ENCODING_EBCDIC);
963 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
964 (in[2] == 0x78) && (in[3] == 0x6D))
965 return(XML_CHAR_ENCODING_UTF8);
967 * Although not part of the recommendation, we also
968 * attempt an "auto-recognition" of UTF-16LE and
969 * UTF-16BE encodings.
971 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
972 (in[2] == 0x3F) && (in[3] == 0x00))
973 return(XML_CHAR_ENCODING_UTF16LE);
974 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
975 (in[2] == 0x00) && (in[3] == 0x3F))
976 return(XML_CHAR_ENCODING_UTF16BE);
978 if (len >= 3) {
980 * Errata on XML-1.0 June 20 2001
981 * We now allow an UTF8 encoded BOM
983 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
984 (in[2] == 0xBF))
985 return(XML_CHAR_ENCODING_UTF8);
987 /* For UTF-16 we can recognize by the BOM */
988 if (len >= 2) {
989 if ((in[0] == 0xFE) && (in[1] == 0xFF))
990 return(XML_CHAR_ENCODING_UTF16BE);
991 if ((in[0] == 0xFF) && (in[1] == 0xFE))
992 return(XML_CHAR_ENCODING_UTF16LE);
994 return(XML_CHAR_ENCODING_NONE);
998 * xmlCleanupEncodingAliases:
1000 * Unregisters all aliases
1002 void
1003 xmlCleanupEncodingAliases(void) {
1004 int i;
1006 if (xmlCharEncodingAliases == NULL)
1007 return;
1009 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1010 if (xmlCharEncodingAliases[i].name != NULL)
1011 xmlFree((char *) xmlCharEncodingAliases[i].name);
1012 if (xmlCharEncodingAliases[i].alias != NULL)
1013 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1015 xmlCharEncodingAliasesNb = 0;
1016 xmlCharEncodingAliasesMax = 0;
1017 xmlFree(xmlCharEncodingAliases);
1018 xmlCharEncodingAliases = NULL;
1022 * xmlGetEncodingAlias:
1023 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1025 * Lookup an encoding name for the given alias.
1027 * Returns NULL if not found, otherwise the original name
1029 const char *
1030 xmlGetEncodingAlias(const char *alias) {
1031 int i;
1032 char upper[100];
1034 if (alias == NULL)
1035 return(NULL);
1037 if (xmlCharEncodingAliases == NULL)
1038 return(NULL);
1040 for (i = 0;i < 99;i++) {
1041 upper[i] = toupper(alias[i]);
1042 if (upper[i] == 0) break;
1044 upper[i] = 0;
1047 * Walk down the list looking for a definition of the alias
1049 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1050 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1051 return(xmlCharEncodingAliases[i].name);
1054 return(NULL);
1058 * xmlAddEncodingAlias:
1059 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1060 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1062 * Registers an alias @alias for an encoding named @name. Existing alias
1063 * will be overwritten.
1065 * Returns 0 in case of success, -1 in case of error
1068 xmlAddEncodingAlias(const char *name, const char *alias) {
1069 int i;
1070 char upper[100];
1072 if ((name == NULL) || (alias == NULL))
1073 return(-1);
1075 for (i = 0;i < 99;i++) {
1076 upper[i] = toupper(alias[i]);
1077 if (upper[i] == 0) break;
1079 upper[i] = 0;
1081 if (xmlCharEncodingAliases == NULL) {
1082 xmlCharEncodingAliasesNb = 0;
1083 xmlCharEncodingAliasesMax = 20;
1084 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1085 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1086 if (xmlCharEncodingAliases == NULL)
1087 return(-1);
1088 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1089 xmlCharEncodingAliasesMax *= 2;
1090 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1091 xmlRealloc(xmlCharEncodingAliases,
1092 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1095 * Walk down the list looking for a definition of the alias
1097 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1098 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1100 * Replace the definition.
1102 xmlFree((char *) xmlCharEncodingAliases[i].name);
1103 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1104 return(0);
1108 * Add the definition
1110 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1111 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1112 xmlCharEncodingAliasesNb++;
1113 return(0);
1117 * xmlDelEncodingAlias:
1118 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1120 * Unregisters an encoding alias @alias
1122 * Returns 0 in case of success, -1 in case of error
1125 xmlDelEncodingAlias(const char *alias) {
1126 int i;
1128 if (alias == NULL)
1129 return(-1);
1131 if (xmlCharEncodingAliases == NULL)
1132 return(-1);
1134 * Walk down the list looking for a definition of the alias
1136 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1137 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1138 xmlFree((char *) xmlCharEncodingAliases[i].name);
1139 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1140 xmlCharEncodingAliasesNb--;
1141 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1142 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1143 return(0);
1146 return(-1);
1150 * xmlParseCharEncoding:
1151 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1153 * Compare the string to the encoding schemes already known. Note
1154 * that the comparison is case insensitive accordingly to the section
1155 * [XML] 4.3.3 Character Encoding in Entities.
1157 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1158 * if not recognized.
1160 xmlCharEncoding
1161 xmlParseCharEncoding(const char* name)
1163 const char *alias;
1164 char upper[500];
1165 int i;
1167 if (name == NULL)
1168 return(XML_CHAR_ENCODING_NONE);
1171 * Do the alias resolution
1173 alias = xmlGetEncodingAlias(name);
1174 if (alias != NULL)
1175 name = alias;
1177 for (i = 0;i < 499;i++) {
1178 upper[i] = toupper(name[i]);
1179 if (upper[i] == 0) break;
1181 upper[i] = 0;
1183 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1184 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1185 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1188 * NOTE: if we were able to parse this, the endianness of UTF16 is
1189 * already found and in use
1191 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1192 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1194 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1196 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1199 * NOTE: if we were able to parse this, the endianness of UCS4 is
1200 * already found and in use
1202 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1204 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1207 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1208 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1209 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1211 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1212 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1213 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1215 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1216 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1217 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1218 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1219 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1220 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1221 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1223 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1224 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1225 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1227 #ifdef DEBUG_ENCODING
1228 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1229 #endif
1230 return(XML_CHAR_ENCODING_ERROR);
1234 * xmlGetCharEncodingName:
1235 * @enc: the encoding
1237 * The "canonical" name for XML encoding.
1238 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1239 * Section 4.3.3 Character Encoding in Entities
1241 * Returns the canonical name for the given encoding
1244 const char*
1245 xmlGetCharEncodingName(xmlCharEncoding enc) {
1246 switch (enc) {
1247 case XML_CHAR_ENCODING_ERROR:
1248 return(NULL);
1249 case XML_CHAR_ENCODING_NONE:
1250 return(NULL);
1251 case XML_CHAR_ENCODING_UTF8:
1252 return("UTF-8");
1253 case XML_CHAR_ENCODING_UTF16LE:
1254 return("UTF-16");
1255 case XML_CHAR_ENCODING_UTF16BE:
1256 return("UTF-16");
1257 case XML_CHAR_ENCODING_EBCDIC:
1258 return("EBCDIC");
1259 case XML_CHAR_ENCODING_UCS4LE:
1260 return("ISO-10646-UCS-4");
1261 case XML_CHAR_ENCODING_UCS4BE:
1262 return("ISO-10646-UCS-4");
1263 case XML_CHAR_ENCODING_UCS4_2143:
1264 return("ISO-10646-UCS-4");
1265 case XML_CHAR_ENCODING_UCS4_3412:
1266 return("ISO-10646-UCS-4");
1267 case XML_CHAR_ENCODING_UCS2:
1268 return("ISO-10646-UCS-2");
1269 case XML_CHAR_ENCODING_8859_1:
1270 return("ISO-8859-1");
1271 case XML_CHAR_ENCODING_8859_2:
1272 return("ISO-8859-2");
1273 case XML_CHAR_ENCODING_8859_3:
1274 return("ISO-8859-3");
1275 case XML_CHAR_ENCODING_8859_4:
1276 return("ISO-8859-4");
1277 case XML_CHAR_ENCODING_8859_5:
1278 return("ISO-8859-5");
1279 case XML_CHAR_ENCODING_8859_6:
1280 return("ISO-8859-6");
1281 case XML_CHAR_ENCODING_8859_7:
1282 return("ISO-8859-7");
1283 case XML_CHAR_ENCODING_8859_8:
1284 return("ISO-8859-8");
1285 case XML_CHAR_ENCODING_8859_9:
1286 return("ISO-8859-9");
1287 case XML_CHAR_ENCODING_2022_JP:
1288 return("ISO-2022-JP");
1289 case XML_CHAR_ENCODING_SHIFT_JIS:
1290 return("Shift-JIS");
1291 case XML_CHAR_ENCODING_EUC_JP:
1292 return("EUC-JP");
1293 case XML_CHAR_ENCODING_ASCII:
1294 return(NULL);
1296 return(NULL);
1299 /************************************************************************
1301 * Char encoding handlers *
1303 ************************************************************************/
1306 /* the size should be growable, but it's not a big deal ... */
1307 #define MAX_ENCODING_HANDLERS 50
1308 static xmlCharEncodingHandlerPtr *handlers = NULL;
1309 static int nbCharEncodingHandler = 0;
1312 * The default is UTF-8 for XML, that's also the default used for the
1313 * parser internals, so the default encoding handler is NULL
1316 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1319 * xmlNewCharEncodingHandler:
1320 * @name: the encoding name, in UTF-8 format (ASCII actually)
1321 * @input: the xmlCharEncodingInputFunc to read that encoding
1322 * @output: the xmlCharEncodingOutputFunc to write that encoding
1324 * Create and registers an xmlCharEncodingHandler.
1326 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1328 xmlCharEncodingHandlerPtr
1329 xmlNewCharEncodingHandler(const char *name,
1330 xmlCharEncodingInputFunc input,
1331 xmlCharEncodingOutputFunc output) {
1332 xmlCharEncodingHandlerPtr handler;
1333 const char *alias;
1334 char upper[500];
1335 int i;
1336 char *up = NULL;
1339 * Do the alias resolution
1341 alias = xmlGetEncodingAlias(name);
1342 if (alias != NULL)
1343 name = alias;
1346 * Keep only the uppercase version of the encoding.
1348 if (name == NULL) {
1349 xmlEncodingErr(XML_I18N_NO_NAME,
1350 "xmlNewCharEncodingHandler : no name !\n", NULL);
1351 return(NULL);
1353 for (i = 0;i < 499;i++) {
1354 upper[i] = toupper(name[i]);
1355 if (upper[i] == 0) break;
1357 upper[i] = 0;
1358 up = xmlMemStrdup(upper);
1359 if (up == NULL) {
1360 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361 return(NULL);
1365 * allocate and fill-up an handler block.
1367 handler = (xmlCharEncodingHandlerPtr)
1368 xmlMalloc(sizeof(xmlCharEncodingHandler));
1369 if (handler == NULL) {
1370 xmlFree(up);
1371 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1372 return(NULL);
1374 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1375 handler->input = input;
1376 handler->output = output;
1377 handler->name = up;
1379 #ifdef LIBXML_ICONV_ENABLED
1380 handler->iconv_in = NULL;
1381 handler->iconv_out = NULL;
1382 #endif
1383 #ifdef LIBXML_ICU_ENABLED
1384 handler->uconv_in = NULL;
1385 handler->uconv_out = NULL;
1386 #endif
1389 * registers and returns the handler.
1391 xmlRegisterCharEncodingHandler(handler);
1392 #ifdef DEBUG_ENCODING
1393 xmlGenericError(xmlGenericErrorContext,
1394 "Registered encoding handler for %s\n", name);
1395 #endif
1396 return(handler);
1400 * xmlInitCharEncodingHandlers:
1402 * Initialize the char encoding support, it registers the default
1403 * encoding supported.
1404 * NOTE: while public, this function usually doesn't need to be called
1405 * in normal processing.
1407 void
1408 xmlInitCharEncodingHandlers(void) {
1409 unsigned short int tst = 0x1234;
1410 unsigned char *ptr = (unsigned char *) &tst;
1412 if (handlers != NULL) return;
1414 handlers = (xmlCharEncodingHandlerPtr *)
1415 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1417 if (*ptr == 0x12) xmlLittleEndian = 0;
1418 else if (*ptr == 0x34) xmlLittleEndian = 1;
1419 else {
1420 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1421 "Odd problem at endianness detection\n", NULL);
1424 if (handlers == NULL) {
1425 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1426 return;
1428 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1429 #ifdef LIBXML_OUTPUT_ENABLED
1430 xmlUTF16LEHandler =
1431 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1432 xmlUTF16BEHandler =
1433 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1434 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1435 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1436 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1437 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1438 #ifdef LIBXML_HTML_ENABLED
1439 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1440 #endif
1441 #else
1442 xmlUTF16LEHandler =
1443 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1444 xmlUTF16BEHandler =
1445 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1446 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1447 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1448 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1449 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1450 #endif /* LIBXML_OUTPUT_ENABLED */
1451 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1452 #ifdef LIBXML_ISO8859X_ENABLED
1453 xmlRegisterCharEncodingHandlersISO8859x ();
1454 #endif
1455 #endif
1460 * xmlCleanupCharEncodingHandlers:
1462 * Cleanup the memory allocated for the char encoding support, it
1463 * unregisters all the encoding handlers and the aliases.
1465 void
1466 xmlCleanupCharEncodingHandlers(void) {
1467 xmlCleanupEncodingAliases();
1469 if (handlers == NULL) return;
1471 for (;nbCharEncodingHandler > 0;) {
1472 nbCharEncodingHandler--;
1473 if (handlers[nbCharEncodingHandler] != NULL) {
1474 if (handlers[nbCharEncodingHandler]->name != NULL)
1475 xmlFree(handlers[nbCharEncodingHandler]->name);
1476 xmlFree(handlers[nbCharEncodingHandler]);
1479 xmlFree(handlers);
1480 handlers = NULL;
1481 nbCharEncodingHandler = 0;
1482 xmlDefaultCharEncodingHandler = NULL;
1486 * xmlRegisterCharEncodingHandler:
1487 * @handler: the xmlCharEncodingHandlerPtr handler block
1489 * Register the char encoding handler, surprising, isn't it ?
1491 void
1492 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1493 if (handlers == NULL) xmlInitCharEncodingHandlers();
1494 if ((handler == NULL) || (handlers == NULL)) {
1495 xmlEncodingErr(XML_I18N_NO_HANDLER,
1496 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1497 goto free_handler;
1500 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1501 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1502 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1503 "MAX_ENCODING_HANDLERS");
1504 goto free_handler;
1506 handlers[nbCharEncodingHandler++] = handler;
1507 return;
1509 free_handler:
1510 if (handler != NULL) {
1511 if (handler->name != NULL) {
1512 xmlFree(handler->name);
1514 xmlFree(handler);
1519 * xmlGetCharEncodingHandler:
1520 * @enc: an xmlCharEncoding value.
1522 * Search in the registered set the handler able to read/write that encoding.
1524 * Returns the handler or NULL if not found
1526 xmlCharEncodingHandlerPtr
1527 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1528 xmlCharEncodingHandlerPtr handler;
1530 if (handlers == NULL) xmlInitCharEncodingHandlers();
1531 switch (enc) {
1532 case XML_CHAR_ENCODING_ERROR:
1533 return(NULL);
1534 case XML_CHAR_ENCODING_NONE:
1535 return(NULL);
1536 case XML_CHAR_ENCODING_UTF8:
1537 return(NULL);
1538 case XML_CHAR_ENCODING_UTF16LE:
1539 return(xmlUTF16LEHandler);
1540 case XML_CHAR_ENCODING_UTF16BE:
1541 return(xmlUTF16BEHandler);
1542 case XML_CHAR_ENCODING_EBCDIC:
1543 handler = xmlFindCharEncodingHandler("EBCDIC");
1544 if (handler != NULL) return(handler);
1545 handler = xmlFindCharEncodingHandler("ebcdic");
1546 if (handler != NULL) return(handler);
1547 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1548 if (handler != NULL) return(handler);
1549 handler = xmlFindCharEncodingHandler("IBM-037");
1550 if (handler != NULL) return(handler);
1551 break;
1552 case XML_CHAR_ENCODING_UCS4BE:
1553 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1554 if (handler != NULL) return(handler);
1555 handler = xmlFindCharEncodingHandler("UCS-4");
1556 if (handler != NULL) return(handler);
1557 handler = xmlFindCharEncodingHandler("UCS4");
1558 if (handler != NULL) return(handler);
1559 break;
1560 case XML_CHAR_ENCODING_UCS4LE:
1561 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1562 if (handler != NULL) return(handler);
1563 handler = xmlFindCharEncodingHandler("UCS-4");
1564 if (handler != NULL) return(handler);
1565 handler = xmlFindCharEncodingHandler("UCS4");
1566 if (handler != NULL) return(handler);
1567 break;
1568 case XML_CHAR_ENCODING_UCS4_2143:
1569 break;
1570 case XML_CHAR_ENCODING_UCS4_3412:
1571 break;
1572 case XML_CHAR_ENCODING_UCS2:
1573 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1574 if (handler != NULL) return(handler);
1575 handler = xmlFindCharEncodingHandler("UCS-2");
1576 if (handler != NULL) return(handler);
1577 handler = xmlFindCharEncodingHandler("UCS2");
1578 if (handler != NULL) return(handler);
1579 break;
1582 * We used to keep ISO Latin encodings native in the
1583 * generated data. This led to so many problems that
1584 * this has been removed. One can still change this
1585 * back by registering no-ops encoders for those
1587 case XML_CHAR_ENCODING_8859_1:
1588 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1589 if (handler != NULL) return(handler);
1590 break;
1591 case XML_CHAR_ENCODING_8859_2:
1592 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1593 if (handler != NULL) return(handler);
1594 break;
1595 case XML_CHAR_ENCODING_8859_3:
1596 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1597 if (handler != NULL) return(handler);
1598 break;
1599 case XML_CHAR_ENCODING_8859_4:
1600 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1601 if (handler != NULL) return(handler);
1602 break;
1603 case XML_CHAR_ENCODING_8859_5:
1604 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1605 if (handler != NULL) return(handler);
1606 break;
1607 case XML_CHAR_ENCODING_8859_6:
1608 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1609 if (handler != NULL) return(handler);
1610 break;
1611 case XML_CHAR_ENCODING_8859_7:
1612 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1613 if (handler != NULL) return(handler);
1614 break;
1615 case XML_CHAR_ENCODING_8859_8:
1616 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1617 if (handler != NULL) return(handler);
1618 break;
1619 case XML_CHAR_ENCODING_8859_9:
1620 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1621 if (handler != NULL) return(handler);
1622 break;
1625 case XML_CHAR_ENCODING_2022_JP:
1626 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1627 if (handler != NULL) return(handler);
1628 break;
1629 case XML_CHAR_ENCODING_SHIFT_JIS:
1630 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1631 if (handler != NULL) return(handler);
1632 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1633 if (handler != NULL) return(handler);
1634 handler = xmlFindCharEncodingHandler("Shift_JIS");
1635 if (handler != NULL) return(handler);
1636 break;
1637 case XML_CHAR_ENCODING_EUC_JP:
1638 handler = xmlFindCharEncodingHandler("EUC-JP");
1639 if (handler != NULL) return(handler);
1640 break;
1641 default:
1642 break;
1645 #ifdef DEBUG_ENCODING
1646 xmlGenericError(xmlGenericErrorContext,
1647 "No handler found for encoding %d\n", enc);
1648 #endif
1649 return(NULL);
1653 * xmlFindCharEncodingHandler:
1654 * @name: a string describing the char encoding.
1656 * Search in the registered set the handler able to read/write that encoding.
1658 * Returns the handler or NULL if not found
1660 xmlCharEncodingHandlerPtr
1661 xmlFindCharEncodingHandler(const char *name) {
1662 const char *nalias;
1663 const char *norig;
1664 xmlCharEncoding alias;
1665 #ifdef LIBXML_ICONV_ENABLED
1666 xmlCharEncodingHandlerPtr enc;
1667 iconv_t icv_in, icv_out;
1668 #endif /* LIBXML_ICONV_ENABLED */
1669 #ifdef LIBXML_ICU_ENABLED
1670 xmlCharEncodingHandlerPtr encu;
1671 uconv_t *ucv_in, *ucv_out;
1672 #endif /* LIBXML_ICU_ENABLED */
1673 char upper[100];
1674 int i;
1676 if (handlers == NULL) xmlInitCharEncodingHandlers();
1677 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1678 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1681 * Do the alias resolution
1683 norig = name;
1684 nalias = xmlGetEncodingAlias(name);
1685 if (nalias != NULL)
1686 name = nalias;
1689 * Check first for directly registered encoding names
1691 for (i = 0;i < 99;i++) {
1692 upper[i] = toupper(name[i]);
1693 if (upper[i] == 0) break;
1695 upper[i] = 0;
1697 if (handlers != NULL) {
1698 for (i = 0;i < nbCharEncodingHandler; i++) {
1699 if (!strcmp(upper, handlers[i]->name)) {
1700 #ifdef DEBUG_ENCODING
1701 xmlGenericError(xmlGenericErrorContext,
1702 "Found registered handler for encoding %s\n", name);
1703 #endif
1704 return(handlers[i]);
1709 #ifdef LIBXML_ICONV_ENABLED
1710 /* check whether iconv can handle this */
1711 icv_in = iconv_open("UTF-8", name);
1712 icv_out = iconv_open(name, "UTF-8");
1713 if (icv_in == (iconv_t) -1) {
1714 icv_in = iconv_open("UTF-8", upper);
1716 if (icv_out == (iconv_t) -1) {
1717 icv_out = iconv_open(upper, "UTF-8");
1719 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1720 enc = (xmlCharEncodingHandlerPtr)
1721 xmlMalloc(sizeof(xmlCharEncodingHandler));
1722 if (enc == NULL) {
1723 iconv_close(icv_in);
1724 iconv_close(icv_out);
1725 return(NULL);
1727 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1728 enc->name = xmlMemStrdup(name);
1729 enc->input = NULL;
1730 enc->output = NULL;
1731 enc->iconv_in = icv_in;
1732 enc->iconv_out = icv_out;
1733 #ifdef DEBUG_ENCODING
1734 xmlGenericError(xmlGenericErrorContext,
1735 "Found iconv handler for encoding %s\n", name);
1736 #endif
1737 return enc;
1738 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1739 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1740 "iconv : problems with filters for '%s'\n", name);
1741 if (icv_in != (iconv_t) -1)
1742 iconv_close(icv_in);
1743 else
1744 iconv_close(icv_out);
1746 #endif /* LIBXML_ICONV_ENABLED */
1747 #ifdef LIBXML_ICU_ENABLED
1748 /* check whether icu can handle this */
1749 ucv_in = openIcuConverter(name, 1);
1750 ucv_out = openIcuConverter(name, 0);
1751 if (ucv_in != NULL && ucv_out != NULL) {
1752 encu = (xmlCharEncodingHandlerPtr)
1753 xmlMalloc(sizeof(xmlCharEncodingHandler));
1754 if (encu == NULL) {
1755 closeIcuConverter(ucv_in);
1756 closeIcuConverter(ucv_out);
1757 return(NULL);
1759 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1760 encu->name = xmlMemStrdup(name);
1761 encu->input = NULL;
1762 encu->output = NULL;
1763 encu->uconv_in = ucv_in;
1764 encu->uconv_out = ucv_out;
1765 #ifdef DEBUG_ENCODING
1766 xmlGenericError(xmlGenericErrorContext,
1767 "Found ICU converter handler for encoding %s\n", name);
1768 #endif
1769 return encu;
1770 } else if (ucv_in != NULL || ucv_out != NULL) {
1771 closeIcuConverter(ucv_in);
1772 closeIcuConverter(ucv_out);
1773 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1774 "ICU converter : problems with filters for '%s'\n", name);
1776 #endif /* LIBXML_ICU_ENABLED */
1778 #ifdef DEBUG_ENCODING
1779 xmlGenericError(xmlGenericErrorContext,
1780 "No handler found for encoding %s\n", name);
1781 #endif
1784 * Fallback using the canonical names
1786 alias = xmlParseCharEncoding(norig);
1787 if (alias != XML_CHAR_ENCODING_ERROR) {
1788 const char* canon;
1789 canon = xmlGetCharEncodingName(alias);
1790 if ((canon != NULL) && (strcmp(name, canon))) {
1791 return(xmlFindCharEncodingHandler(canon));
1795 /* If "none of the above", give up */
1796 return(NULL);
1799 /************************************************************************
1801 * ICONV based generic conversion functions *
1803 ************************************************************************/
1805 #ifdef LIBXML_ICONV_ENABLED
1807 * xmlIconvWrapper:
1808 * @cd: iconv converter data structure
1809 * @out: a pointer to an array of bytes to store the result
1810 * @outlen: the length of @out
1811 * @in: a pointer to an array of input bytes
1812 * @inlen: the length of @in
1814 * Returns 0 if success, or
1815 * -1 by lack of space, or
1816 * -2 if the transcoding fails (for *in is not valid utf8 string or
1817 * the result of transformation can't fit into the encoding we want), or
1818 * -3 if there the last byte can't form a single output char.
1820 * The value of @inlen after return is the number of octets consumed
1821 * as the return value is positive, else unpredictable.
1822 * The value of @outlen after return is the number of octets produced.
1824 static int
1825 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1826 const unsigned char *in, int *inlen) {
1827 size_t icv_inlen, icv_outlen;
1828 const char *icv_in = (const char *) in;
1829 char *icv_out = (char *) out;
1830 size_t ret;
1832 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1833 if (outlen != NULL) *outlen = 0;
1834 return(-1);
1836 icv_inlen = *inlen;
1837 icv_outlen = *outlen;
1838 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1839 *inlen -= icv_inlen;
1840 *outlen -= icv_outlen;
1841 if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1842 #ifdef EILSEQ
1843 if (errno == EILSEQ) {
1844 return -2;
1845 } else
1846 #endif
1847 #ifdef E2BIG
1848 if (errno == E2BIG) {
1849 return -1;
1850 } else
1851 #endif
1852 #ifdef EINVAL
1853 if (errno == EINVAL) {
1854 return -3;
1855 } else
1856 #endif
1858 return -3;
1861 return 0;
1863 #endif /* LIBXML_ICONV_ENABLED */
1865 /************************************************************************
1867 * ICU based generic conversion functions *
1869 ************************************************************************/
1871 #ifdef LIBXML_ICU_ENABLED
1873 * xmlUconvWrapper:
1874 * @cd: ICU uconverter data structure
1875 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1876 * @out: a pointer to an array of bytes to store the result
1877 * @outlen: the length of @out
1878 * @in: a pointer to an array of input bytes
1879 * @inlen: the length of @in
1880 * @flush: if true, indicates end of input
1882 * Returns 0 if success, or
1883 * -1 by lack of space, or
1884 * -2 if the transcoding fails (for *in is not valid utf8 string or
1885 * the result of transformation can't fit into the encoding we want), or
1886 * -3 if there the last byte can't form a single output char.
1888 * The value of @inlen after return is the number of octets consumed
1889 * as the return value is positive, else unpredictable.
1890 * The value of @outlen after return is the number of octets produced.
1892 static int
1893 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1894 const unsigned char *in, int *inlen, int flush) {
1895 const char *ucv_in = (const char *) in;
1896 char *ucv_out = (char *) out;
1897 UErrorCode err = U_ZERO_ERROR;
1899 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900 if (outlen != NULL) *outlen = 0;
1901 return(-1);
1904 if (toUnicode) {
1905 /* encoding => UTF-16 => UTF-8 */
1906 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1907 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1908 &cd->pivot_source, &cd->pivot_target,
1909 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1910 } else {
1911 /* UTF-8 => UTF-16 => encoding */
1912 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1913 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1914 &cd->pivot_source, &cd->pivot_target,
1915 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1917 *inlen = ucv_in - (const char*) in;
1918 *outlen = ucv_out - (char *) out;
1919 if (U_SUCCESS(err)) {
1920 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1921 if (flush)
1922 cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1923 return 0;
1925 if (err == U_BUFFER_OVERFLOW_ERROR)
1926 return -1;
1927 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1928 return -2;
1929 return -3;
1931 #endif /* LIBXML_ICU_ENABLED */
1933 /************************************************************************
1935 * The real API used by libxml for on-the-fly conversion *
1937 ************************************************************************/
1940 * xmlEncInputChunk:
1941 * @handler: encoding handler
1942 * @out: a pointer to an array of bytes to store the result
1943 * @outlen: the length of @out
1944 * @in: a pointer to an array of input bytes
1945 * @inlen: the length of @in
1946 * @flush: flush (ICU-related)
1948 * Returns 0 if success, or
1949 * -1 by lack of space, or
1950 * -2 if the transcoding fails (for *in is not valid utf8 string or
1951 * the result of transformation can't fit into the encoding we want), or
1952 * -3 if there the last byte can't form a single output char.
1954 * The value of @inlen after return is the number of octets consumed
1955 * as the return value is 0, else unpredictable.
1956 * The value of @outlen after return is the number of octets produced.
1958 static int
1959 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1960 int *outlen, const unsigned char *in, int *inlen, int flush) {
1961 int ret;
1962 (void)flush;
1964 if (handler->input != NULL) {
1965 ret = handler->input(out, outlen, in, inlen);
1966 if (ret > 0)
1967 ret = 0;
1969 #ifdef LIBXML_ICONV_ENABLED
1970 else if (handler->iconv_in != NULL) {
1971 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1973 #endif /* LIBXML_ICONV_ENABLED */
1974 #ifdef LIBXML_ICU_ENABLED
1975 else if (handler->uconv_in != NULL) {
1976 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1977 flush);
1979 #endif /* LIBXML_ICU_ENABLED */
1980 else {
1981 *outlen = 0;
1982 *inlen = 0;
1983 ret = -2;
1986 return(ret);
1990 * xmlEncOutputChunk:
1991 * @handler: encoding handler
1992 * @out: a pointer to an array of bytes to store the result
1993 * @outlen: the length of @out
1994 * @in: a pointer to an array of input bytes
1995 * @inlen: the length of @in
1997 * Returns 0 if success, or
1998 * -1 by lack of space, or
1999 * -2 if the transcoding fails (for *in is not valid utf8 string or
2000 * the result of transformation can't fit into the encoding we want), or
2001 * -3 if there the last byte can't form a single output char.
2002 * -4 if no output function was found.
2004 * The value of @inlen after return is the number of octets consumed
2005 * as the return value is 0, else unpredictable.
2006 * The value of @outlen after return is the number of octets produced.
2008 static int
2009 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2010 int *outlen, const unsigned char *in, int *inlen) {
2011 int ret;
2013 if (handler->output != NULL) {
2014 ret = handler->output(out, outlen, in, inlen);
2015 if (ret > 0)
2016 ret = 0;
2018 #ifdef LIBXML_ICONV_ENABLED
2019 else if (handler->iconv_out != NULL) {
2020 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2022 #endif /* LIBXML_ICONV_ENABLED */
2023 #ifdef LIBXML_ICU_ENABLED
2024 else if (handler->uconv_out != NULL) {
2025 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2028 #endif /* LIBXML_ICU_ENABLED */
2029 else {
2030 *outlen = 0;
2031 *inlen = 0;
2032 ret = -4;
2035 return(ret);
2039 * xmlCharEncFirstLineInt:
2040 * @handler: char encoding transformation data structure
2041 * @out: an xmlBuffer for the output.
2042 * @in: an xmlBuffer for the input
2043 * @len: number of bytes to convert for the first line, or -1
2045 * Front-end for the encoding handler input function, but handle only
2046 * the very first line, i.e. limit itself to 45 chars.
2048 * Returns the number of byte written if success, or
2049 * -1 general error
2050 * -2 if the transcoding fails (for *in is not valid utf8 string or
2051 * the result of transformation can't fit into the encoding we want), or
2054 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2055 xmlBufferPtr in, int len) {
2056 int ret;
2057 int written;
2058 int toconv;
2060 if (handler == NULL) return(-1);
2061 if (out == NULL) return(-1);
2062 if (in == NULL) return(-1);
2064 /* calculate space available */
2065 written = out->size - out->use - 1; /* count '\0' */
2066 toconv = in->use;
2068 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2069 * 45 chars should be sufficient to reach the end of the encoding
2070 * declaration without going too far inside the document content.
2071 * on UTF-16 this means 90bytes, on UCS4 this means 180
2072 * The actual value depending on guessed encoding is passed as @len
2073 * if provided
2075 if (len >= 0) {
2076 if (toconv > len)
2077 toconv = len;
2078 } else {
2079 if (toconv > 180)
2080 toconv = 180;
2082 if (toconv * 2 >= written) {
2083 xmlBufferGrow(out, toconv * 2);
2084 written = out->size - out->use - 1;
2087 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2088 in->content, &toconv, 0);
2089 xmlBufferShrink(in, toconv);
2090 out->use += written;
2091 out->content[out->use] = 0;
2092 if (ret == -1) ret = -3;
2094 #ifdef DEBUG_ENCODING
2095 switch (ret) {
2096 case 0:
2097 xmlGenericError(xmlGenericErrorContext,
2098 "converted %d bytes to %d bytes of input\n",
2099 toconv, written);
2100 break;
2101 case -1:
2102 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2103 toconv, written, in->use);
2104 break;
2105 case -2:
2106 xmlGenericError(xmlGenericErrorContext,
2107 "input conversion failed due to input error\n");
2108 break;
2109 case -3:
2110 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2111 toconv, written, in->use);
2112 break;
2113 default:
2114 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2116 #endif /* DEBUG_ENCODING */
2118 * Ignore when input buffer is not on a boundary
2120 if (ret == -3) ret = 0;
2121 if (ret == -1) ret = 0;
2122 return(written ? written : ret);
2126 * xmlCharEncFirstLine:
2127 * @handler: char encoding transformation data structure
2128 * @out: an xmlBuffer for the output.
2129 * @in: an xmlBuffer for the input
2131 * Front-end for the encoding handler input function, but handle only
2132 * the very first line, i.e. limit itself to 45 chars.
2134 * Returns the number of byte written if success, or
2135 * -1 general error
2136 * -2 if the transcoding fails (for *in is not valid utf8 string or
2137 * the result of transformation can't fit into the encoding we want), or
2140 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2141 xmlBufferPtr in) {
2142 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2146 * xmlCharEncFirstLineInput:
2147 * @input: a parser input buffer
2148 * @len: number of bytes to convert for the first line, or -1
2150 * Front-end for the encoding handler input function, but handle only
2151 * the very first line. Point is that this is based on autodetection
2152 * of the encoding and once that first line is converted we may find
2153 * out that a different decoder is needed to process the input.
2155 * Returns the number of byte written if success, or
2156 * -1 general error
2157 * -2 if the transcoding fails (for *in is not valid utf8 string or
2158 * the result of transformation can't fit into the encoding we want), or
2161 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2163 int ret;
2164 size_t written;
2165 size_t toconv;
2166 int c_in;
2167 int c_out;
2168 xmlBufPtr in;
2169 xmlBufPtr out;
2171 if ((input == NULL) || (input->encoder == NULL) ||
2172 (input->buffer == NULL) || (input->raw == NULL))
2173 return (-1);
2174 out = input->buffer;
2175 in = input->raw;
2177 toconv = xmlBufUse(in);
2178 if (toconv == 0)
2179 return (0);
2180 written = xmlBufAvail(out) - 1; /* count '\0' */
2182 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2183 * 45 chars should be sufficient to reach the end of the encoding
2184 * declaration without going too far inside the document content.
2185 * on UTF-16 this means 90bytes, on UCS4 this means 180
2186 * The actual value depending on guessed encoding is passed as @len
2187 * if provided
2189 if (len >= 0) {
2190 if (toconv > (unsigned int) len)
2191 toconv = len;
2192 } else {
2193 if (toconv > 180)
2194 toconv = 180;
2196 if (toconv * 2 >= written) {
2197 xmlBufGrow(out, toconv * 2);
2198 written = xmlBufAvail(out) - 1;
2200 if (written > 360)
2201 written = 360;
2203 c_in = toconv;
2204 c_out = written;
2205 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2206 xmlBufContent(in), &c_in, 0);
2207 xmlBufShrink(in, c_in);
2208 xmlBufAddLen(out, c_out);
2209 if (ret == -1)
2210 ret = -3;
2212 switch (ret) {
2213 case 0:
2214 #ifdef DEBUG_ENCODING
2215 xmlGenericError(xmlGenericErrorContext,
2216 "converted %d bytes to %d bytes of input\n",
2217 c_in, c_out);
2218 #endif
2219 break;
2220 case -1:
2221 #ifdef DEBUG_ENCODING
2222 xmlGenericError(xmlGenericErrorContext,
2223 "converted %d bytes to %d bytes of input, %d left\n",
2224 c_in, c_out, (int)xmlBufUse(in));
2225 #endif
2226 break;
2227 case -3:
2228 #ifdef DEBUG_ENCODING
2229 xmlGenericError(xmlGenericErrorContext,
2230 "converted %d bytes to %d bytes of input, %d left\n",
2231 c_in, c_out, (int)xmlBufUse(in));
2232 #endif
2233 break;
2234 case -2: {
2235 char buf[50];
2236 const xmlChar *content = xmlBufContent(in);
2238 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2239 content[0], content[1],
2240 content[2], content[3]);
2241 buf[49] = 0;
2242 xmlEncodingErr(XML_I18N_CONV_FAILED,
2243 "input conversion failed due to input error, bytes %s\n",
2244 buf);
2248 * Ignore when input buffer is not on a boundary
2250 if (ret == -3) ret = 0;
2251 if (ret == -1) ret = 0;
2252 return(c_out ? c_out : ret);
2256 * xmlCharEncInput:
2257 * @input: a parser input buffer
2258 * @flush: try to flush all the raw buffer
2260 * Generic front-end for the encoding handler on parser input
2262 * Returns the number of byte written if success, or
2263 * -1 general error
2264 * -2 if the transcoding fails (for *in is not valid utf8 string or
2265 * the result of transformation can't fit into the encoding we want), or
2268 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2270 int ret;
2271 size_t written;
2272 size_t toconv;
2273 int c_in;
2274 int c_out;
2275 xmlBufPtr in;
2276 xmlBufPtr out;
2278 if ((input == NULL) || (input->encoder == NULL) ||
2279 (input->buffer == NULL) || (input->raw == NULL))
2280 return (-1);
2281 out = input->buffer;
2282 in = input->raw;
2284 toconv = xmlBufUse(in);
2285 if (toconv == 0)
2286 return (0);
2287 if ((toconv > 64 * 1024) && (flush == 0))
2288 toconv = 64 * 1024;
2289 written = xmlBufAvail(out);
2290 if (written > 0)
2291 written--; /* count '\0' */
2292 if (toconv * 2 >= written) {
2293 xmlBufGrow(out, toconv * 2);
2294 written = xmlBufAvail(out);
2295 if (written > 0)
2296 written--; /* count '\0' */
2298 if ((written > 128 * 1024) && (flush == 0))
2299 written = 128 * 1024;
2301 c_in = toconv;
2302 c_out = written;
2303 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2304 xmlBufContent(in), &c_in, flush);
2305 xmlBufShrink(in, c_in);
2306 xmlBufAddLen(out, c_out);
2307 if (ret == -1)
2308 ret = -3;
2310 switch (ret) {
2311 case 0:
2312 #ifdef DEBUG_ENCODING
2313 xmlGenericError(xmlGenericErrorContext,
2314 "converted %d bytes to %d bytes of input\n",
2315 c_in, c_out);
2316 #endif
2317 break;
2318 case -1:
2319 #ifdef DEBUG_ENCODING
2320 xmlGenericError(xmlGenericErrorContext,
2321 "converted %d bytes to %d bytes of input, %d left\n",
2322 c_in, c_out, (int)xmlBufUse(in));
2323 #endif
2324 break;
2325 case -3:
2326 #ifdef DEBUG_ENCODING
2327 xmlGenericError(xmlGenericErrorContext,
2328 "converted %d bytes to %d bytes of input, %d left\n",
2329 c_in, c_out, (int)xmlBufUse(in));
2330 #endif
2331 break;
2332 case -2: {
2333 char buf[50];
2334 const xmlChar *content = xmlBufContent(in);
2336 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2337 content[0], content[1],
2338 content[2], content[3]);
2339 buf[49] = 0;
2340 xmlEncodingErr(XML_I18N_CONV_FAILED,
2341 "input conversion failed due to input error, bytes %s\n",
2342 buf);
2346 * Ignore when input buffer is not on a boundary
2348 if (ret == -3)
2349 ret = 0;
2350 return (c_out? c_out : ret);
2354 * xmlCharEncInFunc:
2355 * @handler: char encoding transformation data structure
2356 * @out: an xmlBuffer for the output.
2357 * @in: an xmlBuffer for the input
2359 * Generic front-end for the encoding handler input function
2361 * Returns the number of byte written if success, or
2362 * -1 general error
2363 * -2 if the transcoding fails (for *in is not valid utf8 string or
2364 * the result of transformation can't fit into the encoding we want), or
2367 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2368 xmlBufferPtr in)
2370 int ret;
2371 int written;
2372 int toconv;
2374 if (handler == NULL)
2375 return (-1);
2376 if (out == NULL)
2377 return (-1);
2378 if (in == NULL)
2379 return (-1);
2381 toconv = in->use;
2382 if (toconv == 0)
2383 return (0);
2384 written = out->size - out->use -1; /* count '\0' */
2385 if (toconv * 2 >= written) {
2386 xmlBufferGrow(out, out->size + toconv * 2);
2387 written = out->size - out->use - 1;
2389 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2390 in->content, &toconv, 1);
2391 xmlBufferShrink(in, toconv);
2392 out->use += written;
2393 out->content[out->use] = 0;
2394 if (ret == -1)
2395 ret = -3;
2397 switch (ret) {
2398 case 0:
2399 #ifdef DEBUG_ENCODING
2400 xmlGenericError(xmlGenericErrorContext,
2401 "converted %d bytes to %d bytes of input\n",
2402 toconv, written);
2403 #endif
2404 break;
2405 case -1:
2406 #ifdef DEBUG_ENCODING
2407 xmlGenericError(xmlGenericErrorContext,
2408 "converted %d bytes to %d bytes of input, %d left\n",
2409 toconv, written, in->use);
2410 #endif
2411 break;
2412 case -3:
2413 #ifdef DEBUG_ENCODING
2414 xmlGenericError(xmlGenericErrorContext,
2415 "converted %d bytes to %d bytes of input, %d left\n",
2416 toconv, written, in->use);
2417 #endif
2418 break;
2419 case -2: {
2420 char buf[50];
2422 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2423 in->content[0], in->content[1],
2424 in->content[2], in->content[3]);
2425 buf[49] = 0;
2426 xmlEncodingErr(XML_I18N_CONV_FAILED,
2427 "input conversion failed due to input error, bytes %s\n",
2428 buf);
2432 * Ignore when input buffer is not on a boundary
2434 if (ret == -3)
2435 ret = 0;
2436 return (written? written : ret);
2439 #ifdef LIBXML_OUTPUT_ENABLED
2441 * xmlCharEncOutput:
2442 * @output: a parser output buffer
2443 * @init: is this an initialization call without data
2445 * Generic front-end for the encoding handler on parser output
2446 * a first call with @init == 1 has to be made first to initiate the
2447 * output in case of non-stateless encoding needing to initiate their
2448 * state or the output (like the BOM in UTF16).
2449 * In case of UTF8 sequence conversion errors for the given encoder,
2450 * the content will be automatically remapped to a CharRef sequence.
2452 * Returns the number of byte written if success, or
2453 * -1 general error
2454 * -2 if the transcoding fails (for *in is not valid utf8 string or
2455 * the result of transformation can't fit into the encoding we want), or
2458 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2460 int ret;
2461 size_t written;
2462 int writtentot = 0;
2463 size_t toconv;
2464 int c_in;
2465 int c_out;
2466 xmlBufPtr in;
2467 xmlBufPtr out;
2469 if ((output == NULL) || (output->encoder == NULL) ||
2470 (output->buffer == NULL) || (output->conv == NULL))
2471 return (-1);
2472 out = output->conv;
2473 in = output->buffer;
2475 retry:
2477 written = xmlBufAvail(out);
2478 if (written > 0)
2479 written--; /* count '\0' */
2482 * First specific handling of the initialization call
2484 if (init) {
2485 c_in = 0;
2486 c_out = written;
2487 /* TODO: Check return value. */
2488 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2489 NULL, &c_in);
2490 xmlBufAddLen(out, c_out);
2491 #ifdef DEBUG_ENCODING
2492 xmlGenericError(xmlGenericErrorContext,
2493 "initialized encoder\n");
2494 #endif
2495 return(c_out);
2499 * Conversion itself.
2501 toconv = xmlBufUse(in);
2502 if (toconv == 0)
2503 return (writtentot);
2504 if (toconv > 64 * 1024)
2505 toconv = 64 * 1024;
2506 if (toconv * 4 >= written) {
2507 xmlBufGrow(out, toconv * 4);
2508 written = xmlBufAvail(out) - 1;
2510 if (written > 256 * 1024)
2511 written = 256 * 1024;
2513 c_in = toconv;
2514 c_out = written;
2515 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2516 xmlBufContent(in), &c_in);
2517 xmlBufShrink(in, c_in);
2518 xmlBufAddLen(out, c_out);
2519 writtentot += c_out;
2520 if (ret == -1) {
2521 if (c_out > 0) {
2522 /* Can be a limitation of iconv or uconv */
2523 goto retry;
2525 ret = -3;
2529 * Attempt to handle error cases
2531 switch (ret) {
2532 case 0:
2533 #ifdef DEBUG_ENCODING
2534 xmlGenericError(xmlGenericErrorContext,
2535 "converted %d bytes to %d bytes of output\n",
2536 c_in, c_out);
2537 #endif
2538 break;
2539 case -1:
2540 #ifdef DEBUG_ENCODING
2541 xmlGenericError(xmlGenericErrorContext,
2542 "output conversion failed by lack of space\n");
2543 #endif
2544 break;
2545 case -3:
2546 #ifdef DEBUG_ENCODING
2547 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2548 c_in, c_out, (int) xmlBufUse(in));
2549 #endif
2550 break;
2551 case -4:
2552 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2553 "xmlCharEncOutFunc: no output function !\n", NULL);
2554 ret = -1;
2555 break;
2556 case -2: {
2557 xmlChar charref[20];
2558 int len = (int) xmlBufUse(in);
2559 xmlChar *content = xmlBufContent(in);
2560 int cur, charrefLen;
2562 cur = xmlGetUTF8Char(content, &len);
2563 if (cur <= 0)
2564 break;
2566 #ifdef DEBUG_ENCODING
2567 xmlGenericError(xmlGenericErrorContext,
2568 "handling output conversion error\n");
2569 xmlGenericError(xmlGenericErrorContext,
2570 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2571 content[0], content[1],
2572 content[2], content[3]);
2573 #endif
2575 * Removes the UTF8 sequence, and replace it by a charref
2576 * and continue the transcoding phase, hoping the error
2577 * did not mangle the encoder state.
2579 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2580 "&#%d;", cur);
2581 xmlBufShrink(in, len);
2582 xmlBufGrow(out, charrefLen * 4);
2583 c_out = xmlBufAvail(out) - 1;
2584 c_in = charrefLen;
2585 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2586 charref, &c_in);
2588 if ((ret < 0) || (c_in != charrefLen)) {
2589 char buf[50];
2591 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2592 content[0], content[1],
2593 content[2], content[3]);
2594 buf[49] = 0;
2595 xmlEncodingErr(XML_I18N_CONV_FAILED,
2596 "output conversion failed due to conv error, bytes %s\n",
2597 buf);
2598 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2599 content[0] = ' ';
2600 break;
2603 xmlBufAddLen(out, c_out);
2604 writtentot += c_out;
2605 goto retry;
2608 return(writtentot ? writtentot : ret);
2610 #endif
2613 * xmlCharEncOutFunc:
2614 * @handler: char encoding transformation data structure
2615 * @out: an xmlBuffer for the output.
2616 * @in: an xmlBuffer for the input
2618 * Generic front-end for the encoding handler output function
2619 * a first call with @in == NULL has to be made firs to initiate the
2620 * output in case of non-stateless encoding needing to initiate their
2621 * state or the output (like the BOM in UTF16).
2622 * In case of UTF8 sequence conversion errors for the given encoder,
2623 * the content will be automatically remapped to a CharRef sequence.
2625 * Returns the number of byte written if success, or
2626 * -1 general error
2627 * -2 if the transcoding fails (for *in is not valid utf8 string or
2628 * the result of transformation can't fit into the encoding we want), or
2631 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2632 xmlBufferPtr in) {
2633 int ret;
2634 int written;
2635 int writtentot = 0;
2636 int toconv;
2638 if (handler == NULL) return(-1);
2639 if (out == NULL) return(-1);
2641 retry:
2643 written = out->size - out->use;
2645 if (written > 0)
2646 written--; /* Gennady: count '/0' */
2649 * First specific handling of in = NULL, i.e. the initialization call
2651 if (in == NULL) {
2652 toconv = 0;
2653 /* TODO: Check return value. */
2654 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2655 NULL, &toconv);
2656 out->use += written;
2657 out->content[out->use] = 0;
2658 #ifdef DEBUG_ENCODING
2659 xmlGenericError(xmlGenericErrorContext,
2660 "initialized encoder\n");
2661 #endif
2662 return(0);
2666 * Conversion itself.
2668 toconv = in->use;
2669 if (toconv == 0)
2670 return(0);
2671 if (toconv * 4 >= written) {
2672 xmlBufferGrow(out, toconv * 4);
2673 written = out->size - out->use - 1;
2675 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2676 in->content, &toconv);
2677 xmlBufferShrink(in, toconv);
2678 out->use += written;
2679 writtentot += written;
2680 out->content[out->use] = 0;
2681 if (ret == -1) {
2682 if (written > 0) {
2683 /* Can be a limitation of iconv or uconv */
2684 goto retry;
2686 ret = -3;
2690 * Attempt to handle error cases
2692 switch (ret) {
2693 case 0:
2694 #ifdef DEBUG_ENCODING
2695 xmlGenericError(xmlGenericErrorContext,
2696 "converted %d bytes to %d bytes of output\n",
2697 toconv, written);
2698 #endif
2699 break;
2700 case -1:
2701 #ifdef DEBUG_ENCODING
2702 xmlGenericError(xmlGenericErrorContext,
2703 "output conversion failed by lack of space\n");
2704 #endif
2705 break;
2706 case -3:
2707 #ifdef DEBUG_ENCODING
2708 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2709 toconv, written, in->use);
2710 #endif
2711 break;
2712 case -4:
2713 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2714 "xmlCharEncOutFunc: no output function !\n", NULL);
2715 ret = -1;
2716 break;
2717 case -2: {
2718 xmlChar charref[20];
2719 int len = in->use;
2720 const xmlChar *utf = (const xmlChar *) in->content;
2721 int cur, charrefLen;
2723 cur = xmlGetUTF8Char(utf, &len);
2724 if (cur <= 0)
2725 break;
2727 #ifdef DEBUG_ENCODING
2728 xmlGenericError(xmlGenericErrorContext,
2729 "handling output conversion error\n");
2730 xmlGenericError(xmlGenericErrorContext,
2731 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2732 in->content[0], in->content[1],
2733 in->content[2], in->content[3]);
2734 #endif
2736 * Removes the UTF8 sequence, and replace it by a charref
2737 * and continue the transcoding phase, hoping the error
2738 * did not mangle the encoder state.
2740 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2741 "&#%d;", cur);
2742 xmlBufferShrink(in, len);
2743 xmlBufferGrow(out, charrefLen * 4);
2744 written = out->size - out->use - 1;
2745 toconv = charrefLen;
2746 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2747 charref, &toconv);
2749 if ((ret < 0) || (toconv != charrefLen)) {
2750 char buf[50];
2752 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2753 in->content[0], in->content[1],
2754 in->content[2], in->content[3]);
2755 buf[49] = 0;
2756 xmlEncodingErr(XML_I18N_CONV_FAILED,
2757 "output conversion failed due to conv error, bytes %s\n",
2758 buf);
2759 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2760 in->content[0] = ' ';
2761 break;
2764 out->use += written;
2765 writtentot += written;
2766 out->content[out->use] = 0;
2767 goto retry;
2770 return(writtentot ? writtentot : ret);
2774 * xmlCharEncCloseFunc:
2775 * @handler: char encoding transformation data structure
2777 * Generic front-end for encoding handler close function
2779 * Returns 0 if success, or -1 in case of error
2782 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2783 int ret = 0;
2784 int tofree = 0;
2785 int i, handler_in_list = 0;
2787 if (handler == NULL) return(-1);
2788 if (handler->name == NULL) return(-1);
2789 if (handlers != NULL) {
2790 for (i = 0;i < nbCharEncodingHandler; i++) {
2791 if (handler == handlers[i]) {
2792 handler_in_list = 1;
2793 break;
2797 #ifdef LIBXML_ICONV_ENABLED
2799 * Iconv handlers can be used only once, free the whole block.
2800 * and the associated icon resources.
2802 if ((handler_in_list == 0) &&
2803 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2804 tofree = 1;
2805 if (handler->iconv_out != NULL) {
2806 if (iconv_close(handler->iconv_out))
2807 ret = -1;
2808 handler->iconv_out = NULL;
2810 if (handler->iconv_in != NULL) {
2811 if (iconv_close(handler->iconv_in))
2812 ret = -1;
2813 handler->iconv_in = NULL;
2816 #endif /* LIBXML_ICONV_ENABLED */
2817 #ifdef LIBXML_ICU_ENABLED
2818 if ((handler_in_list == 0) &&
2819 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2820 tofree = 1;
2821 if (handler->uconv_out != NULL) {
2822 closeIcuConverter(handler->uconv_out);
2823 handler->uconv_out = NULL;
2825 if (handler->uconv_in != NULL) {
2826 closeIcuConverter(handler->uconv_in);
2827 handler->uconv_in = NULL;
2830 #endif
2831 if (tofree) {
2832 /* free up only dynamic handlers iconv/uconv */
2833 if (handler->name != NULL)
2834 xmlFree(handler->name);
2835 handler->name = NULL;
2836 xmlFree(handler);
2838 #ifdef DEBUG_ENCODING
2839 if (ret)
2840 xmlGenericError(xmlGenericErrorContext,
2841 "failed to close the encoding handler\n");
2842 else
2843 xmlGenericError(xmlGenericErrorContext,
2844 "closed the encoding handler\n");
2845 #endif
2847 return(ret);
2851 * xmlByteConsumed:
2852 * @ctxt: an XML parser context
2854 * This function provides the current index of the parser relative
2855 * to the start of the current entity. This function is computed in
2856 * bytes from the beginning starting at zero and finishing at the
2857 * size in byte of the file if parsing a file. The function is
2858 * of constant cost if the input is UTF-8 but can be costly if run
2859 * on non-UTF-8 input.
2861 * Returns the index in bytes from the beginning of the entity or -1
2862 * in case the index could not be computed.
2864 long
2865 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2866 xmlParserInputPtr in;
2868 if (ctxt == NULL) return(-1);
2869 in = ctxt->input;
2870 if (in == NULL) return(-1);
2871 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2872 unsigned int unused = 0;
2873 xmlCharEncodingHandler * handler = in->buf->encoder;
2875 * Encoding conversion, compute the number of unused original
2876 * bytes from the input not consumed and subtract that from
2877 * the raw consumed value, this is not a cheap operation
2879 if (in->end - in->cur > 0) {
2880 unsigned char convbuf[32000];
2881 const unsigned char *cur = (const unsigned char *)in->cur;
2882 int toconv = in->end - in->cur, written = 32000;
2884 int ret;
2886 do {
2887 toconv = in->end - cur;
2888 written = 32000;
2889 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2890 cur, &toconv);
2891 if (ret < 0) {
2892 if (written > 0)
2893 ret = -2;
2894 else
2895 return(-1);
2897 unused += written;
2898 cur += toconv;
2899 } while (ret == -2);
2901 if (in->buf->rawconsumed < unused)
2902 return(-1);
2903 return(in->buf->rawconsumed - unused);
2905 return(in->consumed + (in->cur - in->base));
2908 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2909 #ifdef LIBXML_ISO8859X_ENABLED
2912 * UTF8ToISO8859x:
2913 * @out: a pointer to an array of bytes to store the result
2914 * @outlen: the length of @out
2915 * @in: a pointer to an array of UTF-8 chars
2916 * @inlen: the length of @in
2917 * @xlattable: the 2-level transcoding table
2919 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2920 * block of chars out.
2922 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2923 * The value of @inlen after return is the number of octets consumed
2924 * as the return value is positive, else unpredictable.
2925 * The value of @outlen after return is the number of octets consumed.
2927 static int
2928 UTF8ToISO8859x(unsigned char* out, int *outlen,
2929 const unsigned char* in, int *inlen,
2930 unsigned char const *xlattable) {
2931 const unsigned char* outstart = out;
2932 const unsigned char* inend;
2933 const unsigned char* instart = in;
2934 const unsigned char* processed = in;
2936 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2937 (xlattable == NULL))
2938 return(-1);
2939 if (in == NULL) {
2941 * initialization nothing to do
2943 *outlen = 0;
2944 *inlen = 0;
2945 return(0);
2947 inend = in + (*inlen);
2948 while (in < inend) {
2949 unsigned char d = *in++;
2950 if (d < 0x80) {
2951 *out++ = d;
2952 } else if (d < 0xC0) {
2953 /* trailing byte in leading position */
2954 *outlen = out - outstart;
2955 *inlen = processed - instart;
2956 return(-2);
2957 } else if (d < 0xE0) {
2958 unsigned char c;
2959 if (!(in < inend)) {
2960 /* trailing byte not in input buffer */
2961 *outlen = out - outstart;
2962 *inlen = processed - instart;
2963 return(-3);
2965 c = *in++;
2966 if ((c & 0xC0) != 0x80) {
2967 /* not a trailing byte */
2968 *outlen = out - outstart;
2969 *inlen = processed - instart;
2970 return(-2);
2972 c = c & 0x3F;
2973 d = d & 0x1F;
2974 d = xlattable [48 + c + xlattable [d] * 64];
2975 if (d == 0) {
2976 /* not in character set */
2977 *outlen = out - outstart;
2978 *inlen = processed - instart;
2979 return(-2);
2981 *out++ = d;
2982 } else if (d < 0xF0) {
2983 unsigned char c1;
2984 unsigned char c2;
2985 if (!(in < inend - 1)) {
2986 /* trailing bytes not in input buffer */
2987 *outlen = out - outstart;
2988 *inlen = processed - instart;
2989 return(-3);
2991 c1 = *in++;
2992 if ((c1 & 0xC0) != 0x80) {
2993 /* not a trailing byte (c1) */
2994 *outlen = out - outstart;
2995 *inlen = processed - instart;
2996 return(-2);
2998 c2 = *in++;
2999 if ((c2 & 0xC0) != 0x80) {
3000 /* not a trailing byte (c2) */
3001 *outlen = out - outstart;
3002 *inlen = processed - instart;
3003 return(-2);
3005 c1 = c1 & 0x3F;
3006 c2 = c2 & 0x3F;
3007 d = d & 0x0F;
3008 d = xlattable [48 + c2 + xlattable [48 + c1 +
3009 xlattable [32 + d] * 64] * 64];
3010 if (d == 0) {
3011 /* not in character set */
3012 *outlen = out - outstart;
3013 *inlen = processed - instart;
3014 return(-2);
3016 *out++ = d;
3017 } else {
3018 /* cannot transcode >= U+010000 */
3019 *outlen = out - outstart;
3020 *inlen = processed - instart;
3021 return(-2);
3023 processed = in;
3025 *outlen = out - outstart;
3026 *inlen = processed - instart;
3027 return(*outlen);
3031 * ISO8859xToUTF8
3032 * @out: a pointer to an array of bytes to store the result
3033 * @outlen: the length of @out
3034 * @in: a pointer to an array of ISO Latin 1 chars
3035 * @inlen: the length of @in
3037 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3038 * block of chars out.
3039 * Returns 0 if success, or -1 otherwise
3040 * The value of @inlen after return is the number of octets consumed
3041 * The value of @outlen after return is the number of octets produced.
3043 static int
3044 ISO8859xToUTF8(unsigned char* out, int *outlen,
3045 const unsigned char* in, int *inlen,
3046 unsigned short const *unicodetable) {
3047 unsigned char* outstart = out;
3048 unsigned char* outend;
3049 const unsigned char* instart = in;
3050 const unsigned char* inend;
3051 const unsigned char* instop;
3052 unsigned int c;
3054 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3055 (in == NULL) || (unicodetable == NULL))
3056 return(-1);
3057 outend = out + *outlen;
3058 inend = in + *inlen;
3059 instop = inend;
3061 while ((in < inend) && (out < outend - 2)) {
3062 if (*in >= 0x80) {
3063 c = unicodetable [*in - 0x80];
3064 if (c == 0) {
3065 /* undefined code point */
3066 *outlen = out - outstart;
3067 *inlen = in - instart;
3068 return (-1);
3070 if (c < 0x800) {
3071 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3072 *out++ = (c & 0x3F) | 0x80;
3073 } else {
3074 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3075 *out++ = ((c >> 6) & 0x3F) | 0x80;
3076 *out++ = (c & 0x3F) | 0x80;
3078 ++in;
3080 if (instop - in > outend - out) instop = in + (outend - out);
3081 while ((*in < 0x80) && (in < instop)) {
3082 *out++ = *in++;
3085 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3086 *out++ = *in++;
3088 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3089 *out++ = *in++;
3091 *outlen = out - outstart;
3092 *inlen = in - instart;
3093 return (*outlen);
3097 /************************************************************************
3098 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3099 ************************************************************************/
3101 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3102 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3103 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3104 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3105 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3106 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3107 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3108 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3109 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3110 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3111 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3112 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3113 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3114 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3115 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3116 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3117 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3120 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3121 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3129 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3130 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3131 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3133 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3138 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3141 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3142 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3143 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3144 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3145 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3146 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3147 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3150 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3151 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3152 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3153 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3154 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3155 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3156 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3157 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3158 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3159 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3160 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3161 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3162 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3163 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3164 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3165 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3166 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3169 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3170 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3178 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3179 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3180 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3181 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3182 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3183 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3197 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3198 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3199 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3200 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3203 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3204 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3205 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3206 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3207 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3208 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3209 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3210 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3211 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3212 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3213 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3214 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3215 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3216 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3217 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3218 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3219 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3222 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3223 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3231 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3232 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3233 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3234 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3235 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3237 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3241 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3247 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3248 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3249 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3252 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3253 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3254 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3255 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3256 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3257 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3258 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3259 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3260 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3261 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3262 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3263 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3264 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3265 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3266 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3267 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3268 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3271 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3272 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3280 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3281 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3284 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3285 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3286 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3287 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3288 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3302 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3303 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3304 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3305 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3306 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3307 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3308 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3309 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3310 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3311 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3312 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3313 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3314 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3315 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3316 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3317 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3320 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3321 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3329 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3330 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3338 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3339 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3340 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3341 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3347 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3348 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3349 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3350 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3351 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3352 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3353 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3354 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3355 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3356 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3357 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3358 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3359 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3360 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3361 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3362 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3365 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3366 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3374 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3375 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3376 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3377 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3390 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3391 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3392 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3393 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3400 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3401 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3402 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3403 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3404 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3405 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3406 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3407 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3408 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3410 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3411 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3412 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3413 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3414 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3415 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3418 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3419 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3421 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3427 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3428 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3429 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3430 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3436 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3443 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3448 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3449 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3453 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3454 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3455 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3456 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3457 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3458 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3459 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3460 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3461 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3462 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3463 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3464 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3465 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3466 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3467 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3468 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3471 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3472 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3480 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3481 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3482 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3483 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3484 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3485 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3486 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3498 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3499 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3500 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3501 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3502 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3503 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3504 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3505 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3506 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3507 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3508 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3509 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3510 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3511 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3512 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3513 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3516 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3517 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3523 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3524 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3525 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3526 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3527 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3529 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3530 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3531 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3532 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3535 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3536 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3545 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3546 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3547 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3550 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3551 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3552 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3553 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3554 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3555 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3556 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3557 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3558 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3559 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3560 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3561 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3562 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3563 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3564 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3565 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3566 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3569 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3570 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3578 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3579 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3585 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3586 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3587 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3588 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3589 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3594 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3595 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3600 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3601 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3602 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3603 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3604 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3605 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3606 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3607 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3608 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3609 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3610 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3611 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3612 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3613 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3614 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3615 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3618 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3619 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3627 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3628 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3629 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3630 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3640 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3642 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3644 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3645 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3646 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3647 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3648 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3649 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3652 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3653 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3658 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3659 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3660 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3661 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3664 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3665 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3668 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3671 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3672 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3682 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3687 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3707 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3709 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3712 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3713 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3714 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3717 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3718 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3719 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3720 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3721 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3722 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3723 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3724 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3725 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3726 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3727 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3728 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3729 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3730 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3731 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3732 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3733 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3736 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3737 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3745 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3746 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3747 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3748 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3760 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3761 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3762 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3763 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3766 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3767 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3768 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3769 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3770 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3771 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3772 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3773 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3774 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3775 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3776 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3777 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3778 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3779 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3780 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3781 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3782 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3785 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3786 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3794 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3795 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3796 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3797 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3798 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3803 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3805 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3819 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3822 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3823 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3824 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3829 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3832 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3833 const unsigned char* in, int *inlen) {
3834 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3836 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3837 const unsigned char* in, int *inlen) {
3838 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3841 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3842 const unsigned char* in, int *inlen) {
3843 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3845 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3846 const unsigned char* in, int *inlen) {
3847 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3850 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3851 const unsigned char* in, int *inlen) {
3852 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3854 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3855 const unsigned char* in, int *inlen) {
3856 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3859 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3860 const unsigned char* in, int *inlen) {
3861 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3863 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3864 const unsigned char* in, int *inlen) {
3865 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3868 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3869 const unsigned char* in, int *inlen) {
3870 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3872 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3873 const unsigned char* in, int *inlen) {
3874 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3877 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3878 const unsigned char* in, int *inlen) {
3879 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3881 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3882 const unsigned char* in, int *inlen) {
3883 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3886 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3887 const unsigned char* in, int *inlen) {
3888 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3890 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3891 const unsigned char* in, int *inlen) {
3892 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3895 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3896 const unsigned char* in, int *inlen) {
3897 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3899 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3900 const unsigned char* in, int *inlen) {
3901 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3904 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3905 const unsigned char* in, int *inlen) {
3906 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3908 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3909 const unsigned char* in, int *inlen) {
3910 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3913 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3914 const unsigned char* in, int *inlen) {
3915 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3917 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3918 const unsigned char* in, int *inlen) {
3919 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3922 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3923 const unsigned char* in, int *inlen) {
3924 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3926 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3927 const unsigned char* in, int *inlen) {
3928 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3931 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3932 const unsigned char* in, int *inlen) {
3933 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3935 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3936 const unsigned char* in, int *inlen) {
3937 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3940 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3941 const unsigned char* in, int *inlen) {
3942 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3944 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3945 const unsigned char* in, int *inlen) {
3946 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3949 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3950 const unsigned char* in, int *inlen) {
3951 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3953 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3954 const unsigned char* in, int *inlen) {
3955 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3958 static void
3959 xmlRegisterCharEncodingHandlersISO8859x (void) {
3960 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3961 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3962 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3963 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3964 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3965 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3966 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3967 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3968 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3969 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3970 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3971 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3972 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3973 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3976 #endif
3977 #endif
3979 #define bottom_encoding
3980 #include "elfgcchack.h"