libs/xml2/encoding.c

   1 /*
   2  * encoding.c : implements the encoding conversion functions needed for XML
   3  *
   4  * Related specs:
   5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
   6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
   7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
   8  * [ISO-8859-1]   ISO Latin-1 characters codes.
   9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
  10  *                Worldwide Character Encoding -- Version 1.0", Addison-
  11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
  12  *                described in Unicode Technical Report #4.
  13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
  14  *                Information Interchange, ANSI X3.4-1986.
  15  *
  16  * See Copyright for the status of this software.
  17  *
  18  * daniel@veillard.com
  19  *
  20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
  21  */
  22
  23 #define IN_LIBXML
  24 #include "libxml.h"
  25
  26 #include <string.h>
  27 #include <limits.h>
  28
  29 #ifdef HAVE_CTYPE_H
  30 #include <ctype.h>
  31 #endif
  32 #ifdef HAVE_STDLIB_H
  33 #include <stdlib.h>
  34 #endif
  35 #ifdef LIBXML_ICONV_ENABLED
  36 #ifdef HAVE_ERRNO_H
  37 #include <errno.h>
  38 #endif
  39 #endif
  40 #include <libxml/encoding.h>
  41 #include <libxml/xmlmemory.h>
  42 #ifdef LIBXML_HTML_ENABLED
  43 #include <libxml/HTMLparser.h>
  44 #endif
  45 #include <libxml/globals.h>
  46 #include <libxml/xmlerror.h>
  47
  48 #include "buf.h"
  49 #include "enc.h"
  50
  51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
  52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
  53
  54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
  55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
  56 struct _xmlCharEncodingAlias {
  57     const char *name;
  58     const char *alias;
  59 };
  60
  61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
  62 static int xmlCharEncodingAliasesNb = 0;
  63 static int xmlCharEncodingAliasesMax = 0;
  64
  65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
  66 #if 0
  67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
  68 #endif
  69 #else
  70 #ifdef LIBXML_ISO8859X_ENABLED
  71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
  72 #endif
  73 #endif
  74
  75 static int xmlLittleEndian = 1;
  76
  77 /**
  78  * xmlEncodingErrMemory:
  79  * @extra:  extra information
  80  *
  81  * Handle an out of memory condition
  82  */
  83 static void
  84 xmlEncodingErrMemory(const char *extra)
  85 {
  86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  87 }
  88
  89 /**
  90  * xmlErrEncoding:
  91  * @error:  the error number
  92  * @msg:  the error message
  93  *
  94  * n encoding error
  95  */
  96 static void LIBXML_ATTR_FORMAT(2,0)
  97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
  98 {
  99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
 100                     XML_FROM_I18N, error, XML_ERR_FATAL,
 101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
 102 }
 103
 104 #ifdef LIBXML_ICU_ENABLED
 105 static uconv_t*
 106 openIcuConverter(const char* name, int toUnicode)
 107 {
 108   UErrorCode status = U_ZERO_ERROR;
 109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
 110   if (conv == NULL)
 111     return NULL;
 112
 113   conv->pivot_source = conv->pivot_buf;
 114   conv->pivot_target = conv->pivot_buf;
 115
 116   conv->uconv = ucnv_open(name, &status);
 117   if (U_FAILURE(status))
 118     goto error;
 119
 120   status = U_ZERO_ERROR;
 121   if (toUnicode) {
 122     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
 123                         NULL, NULL, NULL, &status);
 124   }
 125   else {
 126     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
 127                         NULL, NULL, NULL, &status);
 128   }
 129   if (U_FAILURE(status))
 130     goto error;
 131
 132   status = U_ZERO_ERROR;
 133   conv->utf8 = ucnv_open("UTF-8", &status);
 134   if (U_SUCCESS(status))
 135     return conv;
 136
 137 error:
 138   if (conv->uconv)
 139     ucnv_close(conv->uconv);
 140   xmlFree(conv);
 141   return NULL;
 142 }
 143
 144 static void
 145 closeIcuConverter(uconv_t *conv)
 146 {
 147   if (conv != NULL) {
 148     ucnv_close(conv->uconv);
 149     ucnv_close(conv->utf8);
 150     xmlFree(conv);
 151   }
 152 }
 153 #endif /* LIBXML_ICU_ENABLED */
 154
 155 /************************************************************************
 156  *                                                                      *
 157  *              Conversions To/From UTF8 encoding                       *
 158  *                                                                      *
 159  ************************************************************************/
 160
 161 /**
 162  * asciiToUTF8:
 163  * @out:  a pointer to an array of bytes to store the result
 164  * @outlen:  the length of @out
 165  * @in:  a pointer to an array of ASCII chars
 166  * @inlen:  the length of @in
 167  *
 168  * Take a block of ASCII chars in and try to convert it to an UTF-8
 169  * block of chars out.
 170  * Returns 0 if success, or -1 otherwise
 171  * The value of @inlen after return is the number of octets consumed
 172  *     if the return value is positive, else unpredictable.
 173  * The value of @outlen after return is the number of octets produced.
 174  */
 175 static int
 176 asciiToUTF8(unsigned char* out, int *outlen,
 177               const unsigned char* in, int *inlen) {
 178     unsigned char* outstart = out;
 179     const unsigned char* base = in;
 180     const unsigned char* processed = in;
 181     unsigned char* outend = out + *outlen;
 182     const unsigned char* inend;
 183     unsigned int c;
 184
 185     inend = in + (*inlen);
 186     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 187         c= *in++;
 188
 189         if (out >= outend)
 190             break;
 191         if (c < 0x80) {
 192             *out++ = c;
 193         } else {
 194             *outlen = out - outstart;
 195             *inlen = processed - base;
 196             return(-1);
 197         }
 198
 199         processed = (const unsigned char*) in;
 200     }
 201     *outlen = out - outstart;
 202     *inlen = processed - base;
 203     return(*outlen);
 204 }
 205
 206 #ifdef LIBXML_OUTPUT_ENABLED
 207 /**
 208  * UTF8Toascii:
 209  * @out:  a pointer to an array of bytes to store the result
 210  * @outlen:  the length of @out
 211  * @in:  a pointer to an array of UTF-8 chars
 212  * @inlen:  the length of @in
 213  *
 214  * Take a block of UTF-8 chars in and try to convert it to an ASCII
 215  * block of chars out.
 216  *
 217  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 218  * The value of @inlen after return is the number of octets consumed
 219  *     if the return value is positive, else unpredictable.
 220  * The value of @outlen after return is the number of octets produced.
 221  */
 222 static int
 223 UTF8Toascii(unsigned char* out, int *outlen,
 224               const unsigned char* in, int *inlen) {
 225     const unsigned char* processed = in;
 226     const unsigned char* outend;
 227     const unsigned char* outstart = out;
 228     const unsigned char* instart = in;
 229     const unsigned char* inend;
 230     unsigned int c, d;
 231     int trailing;
 232
 233     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 234     if (in == NULL) {
 235         /*
 236          * initialization nothing to do
 237          */
 238         *outlen = 0;
 239         *inlen = 0;
 240         return(0);
 241     }
 242     inend = in + (*inlen);
 243     outend = out + (*outlen);
 244     while (in < inend) {
 245         d = *in++;
 246         if      (d < 0x80)  { c= d; trailing= 0; }
 247         else if (d < 0xC0) {
 248             /* trailing byte in leading position */
 249             *outlen = out - outstart;
 250             *inlen = processed - instart;
 251             return(-2);
 252         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 253         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 254         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 255         else {
 256             /* no chance for this in Ascii */
 257             *outlen = out - outstart;
 258             *inlen = processed - instart;
 259             return(-2);
 260         }
 261
 262         if (inend - in < trailing) {
 263             break;
 264         }
 265
 266         for ( ; trailing; trailing--) {
 267             if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 268                 break;
 269             c <<= 6;
 270             c |= d & 0x3F;
 271         }
 272
 273         /* assertion: c is a single UTF-4 value */
 274         if (c < 0x80) {
 275             if (out >= outend)
 276                 break;
 277             *out++ = c;
 278         } else {
 279             /* no chance for this in Ascii */
 280             *outlen = out - outstart;
 281             *inlen = processed - instart;
 282             return(-2);
 283         }
 284         processed = in;
 285     }
 286     *outlen = out - outstart;
 287     *inlen = processed - instart;
 288     return(*outlen);
 289 }
 290 #endif /* LIBXML_OUTPUT_ENABLED */
 291
 292 /**
 293  * isolat1ToUTF8:
 294  * @out:  a pointer to an array of bytes to store the result
 295  * @outlen:  the length of @out
 296  * @in:  a pointer to an array of ISO Latin 1 chars
 297  * @inlen:  the length of @in
 298  *
 299  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 300  * block of chars out.
 301  * Returns the number of bytes written if success, or -1 otherwise
 302  * The value of @inlen after return is the number of octets consumed
 303  *     if the return value is positive, else unpredictable.
 304  * The value of @outlen after return is the number of octets produced.
 305  */
 306 int
 307 isolat1ToUTF8(unsigned char* out, int *outlen,
 308               const unsigned char* in, int *inlen) {
 309     unsigned char* outstart = out;
 310     const unsigned char* base = in;
 311     unsigned char* outend;
 312     const unsigned char* inend;
 313     const unsigned char* instop;
 314
 315     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
 316         return(-1);
 317
 318     outend = out + *outlen;
 319     inend = in + (*inlen);
 320     instop = inend;
 321
 322     while ((in < inend) && (out < outend - 1)) {
 323         if (*in >= 0x80) {
 324             *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
 325             *out++ = ((*in) & 0x3F) | 0x80;
 326             ++in;
 327         }
 328         if ((instop - in) > (outend - out)) instop = in + (outend - out);
 329         while ((in < instop) && (*in < 0x80)) {
 330             *out++ = *in++;
 331         }
 332     }
 333     if ((in < inend) && (out < outend) && (*in < 0x80)) {
 334         *out++ = *in++;
 335     }
 336     *outlen = out - outstart;
 337     *inlen = in - base;
 338     return(*outlen);
 339 }
 340
 341 /**
 342  * UTF8ToUTF8:
 343  * @out:  a pointer to an array of bytes to store the result
 344  * @outlen:  the length of @out
 345  * @inb:  a pointer to an array of UTF-8 chars
 346  * @inlenb:  the length of @in in UTF-8 chars
 347  *
 348  * No op copy operation for UTF8 handling.
 349  *
 350  * Returns the number of bytes written, or -1 if lack of space.
 351  *     The value of *inlen after return is the number of octets consumed
 352  *     if the return value is positive, else unpredictable.
 353  */
 354 static int
 355 UTF8ToUTF8(unsigned char* out, int *outlen,
 356            const unsigned char* inb, int *inlenb)
 357 {
 358     int len;
 359
 360     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
 361         return(-1);
 362     if (inb == NULL) {
 363         /* inb == NULL means output is initialized. */
 364         *outlen = 0;
 365         *inlenb = 0;
 366         return(0);
 367     }
 368     if (*outlen > *inlenb) {
 369         len = *inlenb;
 370     } else {
 371         len = *outlen;
 372     }
 373     if (len < 0)
 374         return(-1);
 375
 376     /*
 377      * FIXME: Conversion functions must assure valid UTF-8, so we have
 378      * to check for UTF-8 validity. Preferably, this converter shouldn't
 379      * be used at all.
 380      */
 381     memcpy(out, inb, len);
 382
 383     *outlen = len;
 384     *inlenb = len;
 385     return(*outlen);
 386 }
 387
 388
 389 #ifdef LIBXML_OUTPUT_ENABLED
 390 /**
 391  * UTF8Toisolat1:
 392  * @out:  a pointer to an array of bytes to store the result
 393  * @outlen:  the length of @out
 394  * @in:  a pointer to an array of UTF-8 chars
 395  * @inlen:  the length of @in
 396  *
 397  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
 398  * block of chars out.
 399  *
 400  * Returns the number of bytes written if success, -2 if the transcoding fails,
 401            or -1 otherwise
 402  * The value of @inlen after return is the number of octets consumed
 403  *     if the return value is positive, else unpredictable.
 404  * The value of @outlen after return is the number of octets produced.
 405  */
 406 int
 407 UTF8Toisolat1(unsigned char* out, int *outlen,
 408               const unsigned char* in, int *inlen) {
 409     const unsigned char* processed = in;
 410     const unsigned char* outend;
 411     const unsigned char* outstart = out;
 412     const unsigned char* instart = in;
 413     const unsigned char* inend;
 414     unsigned int c, d;
 415     int trailing;
 416
 417     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 418     if (in == NULL) {
 419         /*
 420          * initialization nothing to do
 421          */
 422         *outlen = 0;
 423         *inlen = 0;
 424         return(0);
 425     }
 426     inend = in + (*inlen);
 427     outend = out + (*outlen);
 428     while (in < inend) {
 429         d = *in++;
 430         if      (d < 0x80)  { c= d; trailing= 0; }
 431         else if (d < 0xC0) {
 432             /* trailing byte in leading position */
 433             *outlen = out - outstart;
 434             *inlen = processed - instart;
 435             return(-2);
 436         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 437         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 438         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 439         else {
 440             /* no chance for this in IsoLat1 */
 441             *outlen = out - outstart;
 442             *inlen = processed - instart;
 443             return(-2);
 444         }
 445
 446         if (inend - in < trailing) {
 447             break;
 448         }
 449
 450         for ( ; trailing; trailing--) {
 451             if (in >= inend)
 452                 break;
 453             if (((d= *in++) & 0xC0) != 0x80) {
 454                 *outlen = out - outstart;
 455                 *inlen = processed - instart;
 456                 return(-2);
 457             }
 458             c <<= 6;
 459             c |= d & 0x3F;
 460         }
 461
 462         /* assertion: c is a single UTF-4 value */
 463         if (c <= 0xFF) {
 464             if (out >= outend)
 465                 break;
 466             *out++ = c;
 467         } else {
 468             /* no chance for this in IsoLat1 */
 469             *outlen = out - outstart;
 470             *inlen = processed - instart;
 471             return(-2);
 472         }
 473         processed = in;
 474     }
 475     *outlen = out - outstart;
 476     *inlen = processed - instart;
 477     return(*outlen);
 478 }
 479 #endif /* LIBXML_OUTPUT_ENABLED */
 480
 481 /**
 482  * UTF16LEToUTF8:
 483  * @out:  a pointer to an array of bytes to store the result
 484  * @outlen:  the length of @out
 485  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
 486  * @inlenb:  the length of @in in UTF-16LE chars
 487  *
 488  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
 489  * block of chars out. This function assumes the endian property
 490  * is the same between the native type of this machine and the
 491  * inputed one.
 492  *
 493  * Returns the number of bytes written, or -1 if lack of space, or -2
 494  *     if the transcoding fails (if *in is not a valid utf16 string)
 495  *     The value of *inlen after return is the number of octets consumed
 496  *     if the return value is positive, else unpredictable.
 497  */
 498 static int
 499 UTF16LEToUTF8(unsigned char* out, int *outlen,
 500             const unsigned char* inb, int *inlenb)
 501 {
 502     unsigned char* outstart = out;
 503     const unsigned char* processed = inb;
 504     unsigned char* outend;
 505     unsigned short* in = (unsigned short*) inb;
 506     unsigned short* inend;
 507     unsigned int c, d, inlen;
 508     unsigned char *tmp;
 509     int bits;
 510
 511     if (*outlen == 0) {
 512         *inlenb = 0;
 513         return(0);
 514     }
 515     outend = out + *outlen;
 516     if ((*inlenb % 2) == 1)
 517         (*inlenb)--;
 518     inlen = *inlenb / 2;
 519     inend = in + inlen;
 520     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 521         if (xmlLittleEndian) {
 522             c= *in++;
 523         } else {
 524             tmp = (unsigned char *) in;
 525             c = *tmp++;
 526             c = c | (((unsigned int)*tmp) << 8);
 527             in++;
 528         }
 529         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 530             if (in >= inend) {           /* handle split mutli-byte characters */
 531                 break;
 532             }
 533             if (xmlLittleEndian) {
 534                 d = *in++;
 535             } else {
 536                 tmp = (unsigned char *) in;
 537                 d = *tmp++;
 538                 d = d | (((unsigned int)*tmp) << 8);
 539                 in++;
 540             }
 541             if ((d & 0xFC00) == 0xDC00) {
 542                 c &= 0x03FF;
 543                 c <<= 10;
 544                 c |= d & 0x03FF;
 545                 c += 0x10000;
 546             }
 547             else {
 548                 *outlen = out - outstart;
 549                 *inlenb = processed - inb;
 550                 return(-2);
 551             }
 552         }
 553
 554         /* assertion: c is a single UTF-4 value */
 555         if (out >= outend)
 556             break;
 557         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 558         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 559         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 560         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 561
 562         for ( ; bits >= 0; bits-= 6) {
 563             if (out >= outend)
 564                 break;
 565             *out++= ((c >> bits) & 0x3F) | 0x80;
 566         }
 567         processed = (const unsigned char*) in;
 568     }
 569     *outlen = out - outstart;
 570     *inlenb = processed - inb;
 571     return(*outlen);
 572 }
 573
 574 #ifdef LIBXML_OUTPUT_ENABLED
 575 /**
 576  * UTF8ToUTF16LE:
 577  * @outb:  a pointer to an array of bytes to store the result
 578  * @outlen:  the length of @outb
 579  * @in:  a pointer to an array of UTF-8 chars
 580  * @inlen:  the length of @in
 581  *
 582  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
 583  * block of chars out.
 584  *
 585  * Returns the number of bytes written, or -1 if lack of space, or -2
 586  *     if the transcoding failed.
 587  */
 588 static int
 589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
 590             const unsigned char* in, int *inlen)
 591 {
 592     unsigned short* out = (unsigned short*) outb;
 593     const unsigned char* processed = in;
 594     const unsigned char *const instart = in;
 595     unsigned short* outstart= out;
 596     unsigned short* outend;
 597     const unsigned char* inend;
 598     unsigned int c, d;
 599     int trailing;
 600     unsigned char *tmp;
 601     unsigned short tmp1, tmp2;
 602
 603     /* UTF16LE encoding has no BOM */
 604     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 605     if (in == NULL) {
 606         *outlen = 0;
 607         *inlen = 0;
 608         return(0);
 609     }
 610     inend= in + *inlen;
 611     outend = out + (*outlen / 2);
 612     while (in < inend) {
 613       d= *in++;
 614       if      (d < 0x80)  { c= d; trailing= 0; }
 615       else if (d < 0xC0) {
 616           /* trailing byte in leading position */
 617           *outlen = (out - outstart) * 2;
 618           *inlen = processed - instart;
 619           return(-2);
 620       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 621       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 622       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 623       else {
 624         /* no chance for this in UTF-16 */
 625         *outlen = (out - outstart) * 2;
 626         *inlen = processed - instart;
 627         return(-2);
 628       }
 629
 630       if (inend - in < trailing) {
 631           break;
 632       }
 633
 634       for ( ; trailing; trailing--) {
 635           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 636               break;
 637           c <<= 6;
 638           c |= d & 0x3F;
 639       }
 640
 641       /* assertion: c is a single UTF-4 value */
 642         if (c < 0x10000) {
 643             if (out >= outend)
 644                 break;
 645             if (xmlLittleEndian) {
 646                 *out++ = c;
 647             } else {
 648                 tmp = (unsigned char *) out;
 649                 *tmp = c ;
 650                 *(tmp + 1) = c >> 8 ;
 651                 out++;
 652             }
 653         }
 654         else if (c < 0x110000) {
 655             if (out+1 >= outend)
 656                 break;
 657             c -= 0x10000;
 658             if (xmlLittleEndian) {
 659                 *out++ = 0xD800 | (c >> 10);
 660                 *out++ = 0xDC00 | (c & 0x03FF);
 661             } else {
 662                 tmp1 = 0xD800 | (c >> 10);
 663                 tmp = (unsigned char *) out;
 664                 *tmp = (unsigned char) tmp1;
 665                 *(tmp + 1) = tmp1 >> 8;
 666                 out++;
 667
 668                 tmp2 = 0xDC00 | (c & 0x03FF);
 669                 tmp = (unsigned char *) out;
 670                 *tmp  = (unsigned char) tmp2;
 671                 *(tmp + 1) = tmp2 >> 8;
 672                 out++;
 673             }
 674         }
 675         else
 676             break;
 677         processed = in;
 678     }
 679     *outlen = (out - outstart) * 2;
 680     *inlen = processed - instart;
 681     return(*outlen);
 682 }
 683
 684 /**
 685  * UTF8ToUTF16:
 686  * @outb:  a pointer to an array of bytes to store the result
 687  * @outlen:  the length of @outb
 688  * @in:  a pointer to an array of UTF-8 chars
 689  * @inlen:  the length of @in
 690  *
 691  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
 692  * block of chars out.
 693  *
 694  * Returns the number of bytes written, or -1 if lack of space, or -2
 695  *     if the transcoding failed.
 696  */
 697 static int
 698 UTF8ToUTF16(unsigned char* outb, int *outlen,
 699             const unsigned char* in, int *inlen)
 700 {
 701     if (in == NULL) {
 702         /*
 703          * initialization, add the Byte Order Mark for UTF-16LE
 704          */
 705         if (*outlen >= 2) {
 706             outb[0] = 0xFF;
 707             outb[1] = 0xFE;
 708             *outlen = 2;
 709             *inlen = 0;
 710 #ifdef DEBUG_ENCODING
 711             xmlGenericError(xmlGenericErrorContext,
 712                     "Added FFFE Byte Order Mark\n");
 713 #endif
 714             return(2);
 715         }
 716         *outlen = 0;
 717         *inlen = 0;
 718         return(0);
 719     }
 720     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
 721 }
 722 #endif /* LIBXML_OUTPUT_ENABLED */
 723
 724 /**
 725  * UTF16BEToUTF8:
 726  * @out:  a pointer to an array of bytes to store the result
 727  * @outlen:  the length of @out
 728  * @inb:  a pointer to an array of UTF-16 passed as a byte array
 729  * @inlenb:  the length of @in in UTF-16 chars
 730  *
 731  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
 732  * block of chars out. This function assumes the endian property
 733  * is the same between the native type of this machine and the
 734  * inputed one.
 735  *
 736  * Returns the number of bytes written, or -1 if lack of space, or -2
 737  *     if the transcoding fails (if *in is not a valid utf16 string)
 738  * The value of *inlen after return is the number of octets consumed
 739  *     if the return value is positive, else unpredictable.
 740  */
 741 static int
 742 UTF16BEToUTF8(unsigned char* out, int *outlen,
 743             const unsigned char* inb, int *inlenb)
 744 {
 745     unsigned char* outstart = out;
 746     const unsigned char* processed = inb;
 747     unsigned char* outend;
 748     unsigned short* in = (unsigned short*) inb;
 749     unsigned short* inend;
 750     unsigned int c, d, inlen;
 751     unsigned char *tmp;
 752     int bits;
 753
 754     if (*outlen == 0) {
 755         *inlenb = 0;
 756         return(0);
 757     }
 758     outend = out + *outlen;
 759     if ((*inlenb % 2) == 1)
 760         (*inlenb)--;
 761     inlen = *inlenb / 2;
 762     inend= in + inlen;
 763     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 764         if (xmlLittleEndian) {
 765             tmp = (unsigned char *) in;
 766             c = *tmp++;
 767             c = (c << 8) | (unsigned int) *tmp;
 768             in++;
 769         } else {
 770             c= *in++;
 771         }
 772         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 773             if (in >= inend) {           /* handle split mutli-byte characters */
 774                 break;
 775             }
 776             if (xmlLittleEndian) {
 777                 tmp = (unsigned char *) in;
 778                 d = *tmp++;
 779                 d = (d << 8) | (unsigned int) *tmp;
 780                 in++;
 781             } else {
 782                 d= *in++;
 783             }
 784             if ((d & 0xFC00) == 0xDC00) {
 785                 c &= 0x03FF;
 786                 c <<= 10;
 787                 c |= d & 0x03FF;
 788                 c += 0x10000;
 789             }
 790             else {
 791                 *outlen = out - outstart;
 792                 *inlenb = processed - inb;
 793                 return(-2);
 794             }
 795         }
 796
 797         /* assertion: c is a single UTF-4 value */
 798         if (out >= outend)
 799             break;
 800         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 801         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 802         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 803         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 804
 805         for ( ; bits >= 0; bits-= 6) {
 806             if (out >= outend)
 807                 break;
 808             *out++= ((c >> bits) & 0x3F) | 0x80;
 809         }
 810         processed = (const unsigned char*) in;
 811     }
 812     *outlen = out - outstart;
 813     *inlenb = processed - inb;
 814     return(*outlen);
 815 }
 816
 817 #ifdef LIBXML_OUTPUT_ENABLED
 818 /**
 819  * UTF8ToUTF16BE:
 820  * @outb:  a pointer to an array of bytes to store the result
 821  * @outlen:  the length of @outb
 822  * @in:  a pointer to an array of UTF-8 chars
 823  * @inlen:  the length of @in
 824  *
 825  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
 826  * block of chars out.
 827  *
 828  * Returns the number of byte written, or -1 by lack of space, or -2
 829  *     if the transcoding failed.
 830  */
 831 static int
 832 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
 833             const unsigned char* in, int *inlen)
 834 {
 835     unsigned short* out = (unsigned short*) outb;
 836     const unsigned char* processed = in;
 837     const unsigned char *const instart = in;
 838     unsigned short* outstart= out;
 839     unsigned short* outend;
 840     const unsigned char* inend;
 841     unsigned int c, d;
 842     int trailing;
 843     unsigned char *tmp;
 844     unsigned short tmp1, tmp2;
 845
 846     /* UTF-16BE has no BOM */
 847     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 848     if (in == NULL) {
 849         *outlen = 0;
 850         *inlen = 0;
 851         return(0);
 852     }
 853     inend= in + *inlen;
 854     outend = out + (*outlen / 2);
 855     while (in < inend) {
 856       d= *in++;
 857       if      (d < 0x80)  { c= d; trailing= 0; }
 858       else if (d < 0xC0)  {
 859           /* trailing byte in leading position */
 860           *outlen = out - outstart;
 861           *inlen = processed - instart;
 862           return(-2);
 863       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 864       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 865       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 866       else {
 867           /* no chance for this in UTF-16 */
 868           *outlen = out - outstart;
 869           *inlen = processed - instart;
 870           return(-2);
 871       }
 872
 873       if (inend - in < trailing) {
 874           break;
 875       }
 876
 877       for ( ; trailing; trailing--) {
 878           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
 879           c <<= 6;
 880           c |= d & 0x3F;
 881       }
 882
 883       /* assertion: c is a single UTF-4 value */
 884         if (c < 0x10000) {
 885             if (out >= outend)  break;
 886             if (xmlLittleEndian) {
 887                 tmp = (unsigned char *) out;
 888                 *tmp = c >> 8;
 889                 *(tmp + 1) = c;
 890                 out++;
 891             } else {
 892                 *out++ = c;
 893             }
 894         }
 895         else if (c < 0x110000) {
 896             if (out+1 >= outend)  break;
 897             c -= 0x10000;
 898             if (xmlLittleEndian) {
 899                 tmp1 = 0xD800 | (c >> 10);
 900                 tmp = (unsigned char *) out;
 901                 *tmp = tmp1 >> 8;
 902                 *(tmp + 1) = (unsigned char) tmp1;
 903                 out++;
 904
 905                 tmp2 = 0xDC00 | (c & 0x03FF);
 906                 tmp = (unsigned char *) out;
 907                 *tmp = tmp2 >> 8;
 908                 *(tmp + 1) = (unsigned char) tmp2;
 909                 out++;
 910             } else {
 911                 *out++ = 0xD800 | (c >> 10);
 912                 *out++ = 0xDC00 | (c & 0x03FF);
 913             }
 914         }
 915         else
 916             break;
 917         processed = in;
 918     }
 919     *outlen = (out - outstart) * 2;
 920     *inlen = processed - instart;
 921     return(*outlen);
 922 }
 923 #endif /* LIBXML_OUTPUT_ENABLED */
 924
 925 /************************************************************************
 926  *                                                                      *
 927  *              Generic encoding handling routines                      *
 928  *                                                                      *
 929  ************************************************************************/
 930
 931 /**
 932  * xmlDetectCharEncoding:
 933  * @in:  a pointer to the first bytes of the XML entity, must be at least
 934  *       2 bytes long (at least 4 if encoding is UTF4 variant).
 935  * @len:  pointer to the length of the buffer
 936  *
 937  * Guess the encoding of the entity using the first bytes of the entity content
 938  * according to the non-normative appendix F of the XML-1.0 recommendation.
 939  *
 940  * Returns one of the XML_CHAR_ENCODING_... values.
 941  */
 942 xmlCharEncoding
 943 xmlDetectCharEncoding(const unsigned char* in, int len)
 944 {
 945     if (in == NULL)
 946         return(XML_CHAR_ENCODING_NONE);
 947     if (len >= 4) {
 948         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 949             (in[2] == 0x00) && (in[3] == 0x3C))
 950             return(XML_CHAR_ENCODING_UCS4BE);
 951         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 952             (in[2] == 0x00) && (in[3] == 0x00))
 953             return(XML_CHAR_ENCODING_UCS4LE);
 954         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 955             (in[2] == 0x3C) && (in[3] == 0x00))
 956             return(XML_CHAR_ENCODING_UCS4_2143);
 957         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 958             (in[2] == 0x00) && (in[3] == 0x00))
 959             return(XML_CHAR_ENCODING_UCS4_3412);
 960         if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
 961             (in[2] == 0xA7) && (in[3] == 0x94))
 962             return(XML_CHAR_ENCODING_EBCDIC);
 963         if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
 964             (in[2] == 0x78) && (in[3] == 0x6D))
 965             return(XML_CHAR_ENCODING_UTF8);
 966         /*
 967          * Although not part of the recommendation, we also
 968          * attempt an "auto-recognition" of UTF-16LE and
 969          * UTF-16BE encodings.
 970          */
 971         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 972             (in[2] == 0x3F) && (in[3] == 0x00))
 973             return(XML_CHAR_ENCODING_UTF16LE);
 974         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 975             (in[2] == 0x00) && (in[3] == 0x3F))
 976             return(XML_CHAR_ENCODING_UTF16BE);
 977     }
 978     if (len >= 3) {
 979         /*
 980          * Errata on XML-1.0 June 20 2001
 981          * We now allow an UTF8 encoded BOM
 982          */
 983         if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
 984             (in[2] == 0xBF))
 985             return(XML_CHAR_ENCODING_UTF8);
 986     }
 987     /* For UTF-16 we can recognize by the BOM */
 988     if (len >= 2) {
 989         if ((in[0] == 0xFE) && (in[1] == 0xFF))
 990             return(XML_CHAR_ENCODING_UTF16BE);
 991         if ((in[0] == 0xFF) && (in[1] == 0xFE))
 992             return(XML_CHAR_ENCODING_UTF16LE);
 993     }
 994     return(XML_CHAR_ENCODING_NONE);
 995 }
 996
 997 /**
 998  * xmlCleanupEncodingAliases:
 999  *
1000  * Unregisters all aliases
1001  */
1002 void
1003 xmlCleanupEncodingAliases(void) {
1004     int i;
1005
1006     if (xmlCharEncodingAliases == NULL)
1007         return;
1008
1009     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1010         if (xmlCharEncodingAliases[i].name != NULL)
1011             xmlFree((char *) xmlCharEncodingAliases[i].name);
1012         if (xmlCharEncodingAliases[i].alias != NULL)
1013             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1014     }
1015     xmlCharEncodingAliasesNb = 0;
1016     xmlCharEncodingAliasesMax = 0;
1017     xmlFree(xmlCharEncodingAliases);
1018     xmlCharEncodingAliases = NULL;
1019 }
1020
1021 /**
1022  * xmlGetEncodingAlias:
1023  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1024  *
1025  * Lookup an encoding name for the given alias.
1026  *
1027  * Returns NULL if not found, otherwise the original name
1028  */
1029 const char *
1030 xmlGetEncodingAlias(const char *alias) {
1031     int i;
1032     char upper[100];
1033
1034     if (alias == NULL)
1035         return(NULL);
1036
1037     if (xmlCharEncodingAliases == NULL)
1038         return(NULL);
1039
1040     for (i = 0;i < 99;i++) {
1041         upper[i] = toupper(alias[i]);
1042         if (upper[i] == 0) break;
1043     }
1044     upper[i] = 0;
1045
1046     /*
1047      * Walk down the list looking for a definition of the alias
1048      */
1049     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1050         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1051             return(xmlCharEncodingAliases[i].name);
1052         }
1053     }
1054     return(NULL);
1055 }
1056
1057 /**
1058  * xmlAddEncodingAlias:
1059  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1060  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1061  *
1062  * Registers an alias @alias for an encoding named @name. Existing alias
1063  * will be overwritten.
1064  *
1065  * Returns 0 in case of success, -1 in case of error
1066  */
1067 int
1068 xmlAddEncodingAlias(const char *name, const char *alias) {
1069     int i;
1070     char upper[100];
1071
1072     if ((name == NULL) || (alias == NULL))
1073         return(-1);
1074
1075     for (i = 0;i < 99;i++) {
1076         upper[i] = toupper(alias[i]);
1077         if (upper[i] == 0) break;
1078     }
1079     upper[i] = 0;
1080
1081     if (xmlCharEncodingAliases == NULL) {
1082         xmlCharEncodingAliasesNb = 0;
1083         xmlCharEncodingAliasesMax = 20;
1084         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1085               xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1086         if (xmlCharEncodingAliases == NULL)
1087             return(-1);
1088     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1089         xmlCharEncodingAliasesMax *= 2;
1090         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1091               xmlRealloc(xmlCharEncodingAliases,
1092                          xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1093     }
1094     /*
1095      * Walk down the list looking for a definition of the alias
1096      */
1097     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1098         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1099             /*
1100              * Replace the definition.
1101              */
1102             xmlFree((char *) xmlCharEncodingAliases[i].name);
1103             xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1104             return(0);
1105         }
1106     }
1107     /*
1108      * Add the definition
1109      */
1110     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1111     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1112     xmlCharEncodingAliasesNb++;
1113     return(0);
1114 }
1115
1116 /**
1117  * xmlDelEncodingAlias:
1118  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1119  *
1120  * Unregisters an encoding alias @alias
1121  *
1122  * Returns 0 in case of success, -1 in case of error
1123  */
1124 int
1125 xmlDelEncodingAlias(const char *alias) {
1126     int i;
1127
1128     if (alias == NULL)
1129         return(-1);
1130
1131     if (xmlCharEncodingAliases == NULL)
1132         return(-1);
1133     /*
1134      * Walk down the list looking for a definition of the alias
1135      */
1136     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1137         if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1138             xmlFree((char *) xmlCharEncodingAliases[i].name);
1139             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1140             xmlCharEncodingAliasesNb--;
1141             memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1142                     sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1143             return(0);
1144         }
1145     }
1146     return(-1);
1147 }
1148
1149 /**
1150  * xmlParseCharEncoding:
1151  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1152  *
1153  * Compare the string to the encoding schemes already known. Note
1154  * that the comparison is case insensitive accordingly to the section
1155  * [XML] 4.3.3 Character Encoding in Entities.
1156  *
1157  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1158  * if not recognized.
1159  */
1160 xmlCharEncoding
1161 xmlParseCharEncoding(const char* name)
1162 {
1163     const char *alias;
1164     char upper[500];
1165     int i;
1166
1167     if (name == NULL)
1168         return(XML_CHAR_ENCODING_NONE);
1169
1170     /*
1171      * Do the alias resolution
1172      */
1173     alias = xmlGetEncodingAlias(name);
1174     if (alias != NULL)
1175         name = alias;
1176
1177     for (i = 0;i < 499;i++) {
1178         upper[i] = toupper(name[i]);
1179         if (upper[i] == 0) break;
1180     }
1181     upper[i] = 0;
1182
1183     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1184     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1185     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1186
1187     /*
1188      * NOTE: if we were able to parse this, the endianness of UTF16 is
1189      *       already found and in use
1190      */
1191     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1192     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1193
1194     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1196     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1197
1198     /*
1199      * NOTE: if we were able to parse this, the endianness of UCS4 is
1200      *       already found and in use
1201      */
1202     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1204     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1205
1206
1207     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1208     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1209     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1210
1211     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1212     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1213     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1214
1215     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1216     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1217     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1218     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1219     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1220     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1221     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1222
1223     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1224     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1225     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1226
1227 #ifdef DEBUG_ENCODING
1228     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1229 #endif
1230     return(XML_CHAR_ENCODING_ERROR);
1231 }
1232
1233 /**
1234  * xmlGetCharEncodingName:
1235  * @enc:  the encoding
1236  *
1237  * The "canonical" name for XML encoding.
1238  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1239  * Section 4.3.3  Character Encoding in Entities
1240  *
1241  * Returns the canonical name for the given encoding
1242  */
1243
1244 const char*
1245 xmlGetCharEncodingName(xmlCharEncoding enc) {
1246     switch (enc) {
1247         case XML_CHAR_ENCODING_ERROR:
1248             return(NULL);
1249         case XML_CHAR_ENCODING_NONE:
1250             return(NULL);
1251         case XML_CHAR_ENCODING_UTF8:
1252             return("UTF-8");
1253         case XML_CHAR_ENCODING_UTF16LE:
1254             return("UTF-16");
1255         case XML_CHAR_ENCODING_UTF16BE:
1256             return("UTF-16");
1257         case XML_CHAR_ENCODING_EBCDIC:
1258             return("EBCDIC");
1259         case XML_CHAR_ENCODING_UCS4LE:
1260             return("ISO-10646-UCS-4");
1261         case XML_CHAR_ENCODING_UCS4BE:
1262             return("ISO-10646-UCS-4");
1263         case XML_CHAR_ENCODING_UCS4_2143:
1264             return("ISO-10646-UCS-4");
1265         case XML_CHAR_ENCODING_UCS4_3412:
1266             return("ISO-10646-UCS-4");
1267         case XML_CHAR_ENCODING_UCS2:
1268             return("ISO-10646-UCS-2");
1269         case XML_CHAR_ENCODING_8859_1:
1270             return("ISO-8859-1");
1271         case XML_CHAR_ENCODING_8859_2:
1272             return("ISO-8859-2");
1273         case XML_CHAR_ENCODING_8859_3:
1274             return("ISO-8859-3");
1275         case XML_CHAR_ENCODING_8859_4:
1276             return("ISO-8859-4");
1277         case XML_CHAR_ENCODING_8859_5:
1278             return("ISO-8859-5");
1279         case XML_CHAR_ENCODING_8859_6:
1280             return("ISO-8859-6");
1281         case XML_CHAR_ENCODING_8859_7:
1282             return("ISO-8859-7");
1283         case XML_CHAR_ENCODING_8859_8:
1284             return("ISO-8859-8");
1285         case XML_CHAR_ENCODING_8859_9:
1286             return("ISO-8859-9");
1287         case XML_CHAR_ENCODING_2022_JP:
1288             return("ISO-2022-JP");
1289         case XML_CHAR_ENCODING_SHIFT_JIS:
1290             return("Shift-JIS");
1291         case XML_CHAR_ENCODING_EUC_JP:
1292             return("EUC-JP");
1293         case XML_CHAR_ENCODING_ASCII:
1294             return(NULL);
1295     }
1296     return(NULL);
1297 }
1298
1299 /************************************************************************
1300  *                                                                      *
1301  *                      Char encoding handlers                          *
1302  *                                                                      *
1303  ************************************************************************/
1304
1305
1306 /* the size should be growable, but it's not a big deal ... */
1307 #define MAX_ENCODING_HANDLERS 50
1308 static xmlCharEncodingHandlerPtr *handlers = NULL;
1309 static int nbCharEncodingHandler = 0;
1310
1311 /*
1312  * The default is UTF-8 for XML, that's also the default used for the
1313  * parser internals, so the default encoding handler is NULL
1314  */
1315
1316 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1317
1318 /**
1319  * xmlNewCharEncodingHandler:
1320  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1321  * @input:  the xmlCharEncodingInputFunc to read that encoding
1322  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1323  *
1324  * Create and registers an xmlCharEncodingHandler.
1325  *
1326  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1327  */
1328 xmlCharEncodingHandlerPtr
1329 xmlNewCharEncodingHandler(const char *name,
1330                           xmlCharEncodingInputFunc input,
1331                           xmlCharEncodingOutputFunc output) {
1332     xmlCharEncodingHandlerPtr handler;
1333     const char *alias;
1334     char upper[500];
1335     int i;
1336     char *up = NULL;
1337
1338     /*
1339      * Do the alias resolution
1340      */
1341     alias = xmlGetEncodingAlias(name);
1342     if (alias != NULL)
1343         name = alias;
1344
1345     /*
1346      * Keep only the uppercase version of the encoding.
1347      */
1348     if (name == NULL) {
1349         xmlEncodingErr(XML_I18N_NO_NAME,
1350                        "xmlNewCharEncodingHandler : no name !\n", NULL);
1351         return(NULL);
1352     }
1353     for (i = 0;i < 499;i++) {
1354         upper[i] = toupper(name[i]);
1355         if (upper[i] == 0) break;
1356     }
1357     upper[i] = 0;
1358     up = xmlMemStrdup(upper);
1359     if (up == NULL) {
1360         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361         return(NULL);
1362     }
1363
1364     /*
1365      * allocate and fill-up an handler block.
1366      */
1367     handler = (xmlCharEncodingHandlerPtr)
1368               xmlMalloc(sizeof(xmlCharEncodingHandler));
1369     if (handler == NULL) {
1370         xmlFree(up);
1371         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1372         return(NULL);
1373     }
1374     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1375     handler->input = input;
1376     handler->output = output;
1377     handler->name = up;
1378
1379 #ifdef LIBXML_ICONV_ENABLED
1380     handler->iconv_in = NULL;
1381     handler->iconv_out = NULL;
1382 #endif
1383 #ifdef LIBXML_ICU_ENABLED
1384     handler->uconv_in = NULL;
1385     handler->uconv_out = NULL;
1386 #endif
1387
1388     /*
1389      * registers and returns the handler.
1390      */
1391     xmlRegisterCharEncodingHandler(handler);
1392 #ifdef DEBUG_ENCODING
1393     xmlGenericError(xmlGenericErrorContext,
1394             "Registered encoding handler for %s\n", name);
1395 #endif
1396     return(handler);
1397 }
1398
1399 /**
1400  * xmlInitCharEncodingHandlers:
1401  *
1402  * Initialize the char encoding support, it registers the default
1403  * encoding supported.
1404  * NOTE: while public, this function usually doesn't need to be called
1405  *       in normal processing.
1406  */
1407 void
1408 xmlInitCharEncodingHandlers(void) {
1409     unsigned short int tst = 0x1234;
1410     unsigned char *ptr = (unsigned char *) &tst;
1411
1412     if (handlers != NULL) return;
1413
1414     handlers = (xmlCharEncodingHandlerPtr *)
1415         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1416
1417     if (*ptr == 0x12) xmlLittleEndian = 0;
1418     else if (*ptr == 0x34) xmlLittleEndian = 1;
1419     else {
1420         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1421                        "Odd problem at endianness detection\n", NULL);
1422     }
1423
1424     if (handlers == NULL) {
1425         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1426         return;
1427     }
1428     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1429 #ifdef LIBXML_OUTPUT_ENABLED
1430     xmlUTF16LEHandler =
1431           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1432     xmlUTF16BEHandler =
1433           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1434     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1435     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1436     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1437     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1438 #ifdef LIBXML_HTML_ENABLED
1439     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1440 #endif
1441 #else
1442     xmlUTF16LEHandler =
1443           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1444     xmlUTF16BEHandler =
1445           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1446     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1447     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1448     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1449     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1450 #endif /* LIBXML_OUTPUT_ENABLED */
1451 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1452 #ifdef LIBXML_ISO8859X_ENABLED
1453     xmlRegisterCharEncodingHandlersISO8859x ();
1454 #endif
1455 #endif
1456
1457 }
1458
1459 /**
1460  * xmlCleanupCharEncodingHandlers:
1461  *
1462  * Cleanup the memory allocated for the char encoding support, it
1463  * unregisters all the encoding handlers and the aliases.
1464  */
1465 void
1466 xmlCleanupCharEncodingHandlers(void) {
1467     xmlCleanupEncodingAliases();
1468
1469     if (handlers == NULL) return;
1470
1471     for (;nbCharEncodingHandler > 0;) {
1472         nbCharEncodingHandler--;
1473         if (handlers[nbCharEncodingHandler] != NULL) {
1474             if (handlers[nbCharEncodingHandler]->name != NULL)
1475                 xmlFree(handlers[nbCharEncodingHandler]->name);
1476             xmlFree(handlers[nbCharEncodingHandler]);
1477         }
1478     }
1479     xmlFree(handlers);
1480     handlers = NULL;
1481     nbCharEncodingHandler = 0;
1482     xmlDefaultCharEncodingHandler = NULL;
1483 }
1484
1485 /**
1486  * xmlRegisterCharEncodingHandler:
1487  * @handler:  the xmlCharEncodingHandlerPtr handler block
1488  *
1489  * Register the char encoding handler, surprising, isn't it ?
1490  */
1491 void
1492 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1493     if (handlers == NULL) xmlInitCharEncodingHandlers();
1494     if ((handler == NULL) || (handlers == NULL)) {
1495         xmlEncodingErr(XML_I18N_NO_HANDLER,
1496                 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1497         goto free_handler;
1498     }
1499
1500     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1501         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1502         "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1503                        "MAX_ENCODING_HANDLERS");
1504         goto free_handler;
1505     }
1506     handlers[nbCharEncodingHandler++] = handler;
1507     return;
1508
1509 free_handler:
1510     if (handler != NULL) {
1511         if (handler->name != NULL) {
1512             xmlFree(handler->name);
1513         }
1514         xmlFree(handler);
1515     }
1516 }
1517
1518 /**
1519  * xmlGetCharEncodingHandler:
1520  * @enc:  an xmlCharEncoding value.
1521  *
1522  * Search in the registered set the handler able to read/write that encoding.
1523  *
1524  * Returns the handler or NULL if not found
1525  */
1526 xmlCharEncodingHandlerPtr
1527 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1528     xmlCharEncodingHandlerPtr handler;
1529
1530     if (handlers == NULL) xmlInitCharEncodingHandlers();
1531     switch (enc) {
1532         case XML_CHAR_ENCODING_ERROR:
1533             return(NULL);
1534         case XML_CHAR_ENCODING_NONE:
1535             return(NULL);
1536         case XML_CHAR_ENCODING_UTF8:
1537             return(NULL);
1538         case XML_CHAR_ENCODING_UTF16LE:
1539             return(xmlUTF16LEHandler);
1540         case XML_CHAR_ENCODING_UTF16BE:
1541             return(xmlUTF16BEHandler);
1542         case XML_CHAR_ENCODING_EBCDIC:
1543             handler = xmlFindCharEncodingHandler("EBCDIC");
1544             if (handler != NULL) return(handler);
1545             handler = xmlFindCharEncodingHandler("ebcdic");
1546             if (handler != NULL) return(handler);
1547             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1548             if (handler != NULL) return(handler);
1549             handler = xmlFindCharEncodingHandler("IBM-037");
1550             if (handler != NULL) return(handler);
1551             break;
1552         case XML_CHAR_ENCODING_UCS4BE:
1553             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1554             if (handler != NULL) return(handler);
1555             handler = xmlFindCharEncodingHandler("UCS-4");
1556             if (handler != NULL) return(handler);
1557             handler = xmlFindCharEncodingHandler("UCS4");
1558             if (handler != NULL) return(handler);
1559             break;
1560         case XML_CHAR_ENCODING_UCS4LE:
1561             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1562             if (handler != NULL) return(handler);
1563             handler = xmlFindCharEncodingHandler("UCS-4");
1564             if (handler != NULL) return(handler);
1565             handler = xmlFindCharEncodingHandler("UCS4");
1566             if (handler != NULL) return(handler);
1567             break;
1568         case XML_CHAR_ENCODING_UCS4_2143:
1569             break;
1570         case XML_CHAR_ENCODING_UCS4_3412:
1571             break;
1572         case XML_CHAR_ENCODING_UCS2:
1573             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1574             if (handler != NULL) return(handler);
1575             handler = xmlFindCharEncodingHandler("UCS-2");
1576             if (handler != NULL) return(handler);
1577             handler = xmlFindCharEncodingHandler("UCS2");
1578             if (handler != NULL) return(handler);
1579             break;
1580
1581             /*
1582              * We used to keep ISO Latin encodings native in the
1583              * generated data. This led to so many problems that
1584              * this has been removed. One can still change this
1585              * back by registering no-ops encoders for those
1586              */
1587         case XML_CHAR_ENCODING_8859_1:
1588             handler = xmlFindCharEncodingHandler("ISO-8859-1");
1589             if (handler != NULL) return(handler);
1590             break;
1591         case XML_CHAR_ENCODING_8859_2:
1592             handler = xmlFindCharEncodingHandler("ISO-8859-2");
1593             if (handler != NULL) return(handler);
1594             break;
1595         case XML_CHAR_ENCODING_8859_3:
1596             handler = xmlFindCharEncodingHandler("ISO-8859-3");
1597             if (handler != NULL) return(handler);
1598             break;
1599         case XML_CHAR_ENCODING_8859_4:
1600             handler = xmlFindCharEncodingHandler("ISO-8859-4");
1601             if (handler != NULL) return(handler);
1602             break;
1603         case XML_CHAR_ENCODING_8859_5:
1604             handler = xmlFindCharEncodingHandler("ISO-8859-5");
1605             if (handler != NULL) return(handler);
1606             break;
1607         case XML_CHAR_ENCODING_8859_6:
1608             handler = xmlFindCharEncodingHandler("ISO-8859-6");
1609             if (handler != NULL) return(handler);
1610             break;
1611         case XML_CHAR_ENCODING_8859_7:
1612             handler = xmlFindCharEncodingHandler("ISO-8859-7");
1613             if (handler != NULL) return(handler);
1614             break;
1615         case XML_CHAR_ENCODING_8859_8:
1616             handler = xmlFindCharEncodingHandler("ISO-8859-8");
1617             if (handler != NULL) return(handler);
1618             break;
1619         case XML_CHAR_ENCODING_8859_9:
1620             handler = xmlFindCharEncodingHandler("ISO-8859-9");
1621             if (handler != NULL) return(handler);
1622             break;
1623
1624
1625         case XML_CHAR_ENCODING_2022_JP:
1626             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1627             if (handler != NULL) return(handler);
1628             break;
1629         case XML_CHAR_ENCODING_SHIFT_JIS:
1630             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1631             if (handler != NULL) return(handler);
1632             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1633             if (handler != NULL) return(handler);
1634             handler = xmlFindCharEncodingHandler("Shift_JIS");
1635             if (handler != NULL) return(handler);
1636             break;
1637         case XML_CHAR_ENCODING_EUC_JP:
1638             handler = xmlFindCharEncodingHandler("EUC-JP");
1639             if (handler != NULL) return(handler);
1640             break;
1641         default:
1642             break;
1643     }
1644
1645 #ifdef DEBUG_ENCODING
1646     xmlGenericError(xmlGenericErrorContext,
1647             "No handler found for encoding %d\n", enc);
1648 #endif
1649     return(NULL);
1650 }
1651
1652 /**
1653  * xmlFindCharEncodingHandler:
1654  * @name:  a string describing the char encoding.
1655  *
1656  * Search in the registered set the handler able to read/write that encoding.
1657  *
1658  * Returns the handler or NULL if not found
1659  */
1660 xmlCharEncodingHandlerPtr
1661 xmlFindCharEncodingHandler(const char *name) {
1662     const char *nalias;
1663     const char *norig;
1664     xmlCharEncoding alias;
1665 #ifdef LIBXML_ICONV_ENABLED
1666     xmlCharEncodingHandlerPtr enc;
1667     iconv_t icv_in, icv_out;
1668 #endif /* LIBXML_ICONV_ENABLED */
1669 #ifdef LIBXML_ICU_ENABLED
1670     xmlCharEncodingHandlerPtr encu;
1671     uconv_t *ucv_in, *ucv_out;
1672 #endif /* LIBXML_ICU_ENABLED */
1673     char upper[100];
1674     int i;
1675
1676     if (handlers == NULL) xmlInitCharEncodingHandlers();
1677     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1678     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1679
1680     /*
1681      * Do the alias resolution
1682      */
1683     norig = name;
1684     nalias = xmlGetEncodingAlias(name);
1685     if (nalias != NULL)
1686         name = nalias;
1687
1688     /*
1689      * Check first for directly registered encoding names
1690      */
1691     for (i = 0;i < 99;i++) {
1692         upper[i] = toupper(name[i]);
1693         if (upper[i] == 0) break;
1694     }
1695     upper[i] = 0;
1696
1697     if (handlers != NULL) {
1698         for (i = 0;i < nbCharEncodingHandler; i++) {
1699             if (!strcmp(upper, handlers[i]->name)) {
1700 #ifdef DEBUG_ENCODING
1701                 xmlGenericError(xmlGenericErrorContext,
1702                         "Found registered handler for encoding %s\n", name);
1703 #endif
1704                 return(handlers[i]);
1705             }
1706         }
1707     }
1708
1709 #ifdef LIBXML_ICONV_ENABLED
1710     /* check whether iconv can handle this */
1711     icv_in = iconv_open("UTF-8", name);
1712     icv_out = iconv_open(name, "UTF-8");
1713     if (icv_in == (iconv_t) -1) {
1714         icv_in = iconv_open("UTF-8", upper);
1715     }
1716     if (icv_out == (iconv_t) -1) {
1717         icv_out = iconv_open(upper, "UTF-8");
1718     }
1719     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1720             enc = (xmlCharEncodingHandlerPtr)
1721                   xmlMalloc(sizeof(xmlCharEncodingHandler));
1722             if (enc == NULL) {
1723                 iconv_close(icv_in);
1724                 iconv_close(icv_out);
1725                 return(NULL);
1726             }
1727             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1728             enc->name = xmlMemStrdup(name);
1729             enc->input = NULL;
1730             enc->output = NULL;
1731             enc->iconv_in = icv_in;
1732             enc->iconv_out = icv_out;
1733 #ifdef DEBUG_ENCODING
1734             xmlGenericError(xmlGenericErrorContext,
1735                     "Found iconv handler for encoding %s\n", name);
1736 #endif
1737             return enc;
1738     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1739             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1740                     "iconv : problems with filters for '%s'\n", name);
1741             if (icv_in != (iconv_t) -1)
1742                 iconv_close(icv_in);
1743             else
1744                 iconv_close(icv_out);
1745     }
1746 #endif /* LIBXML_ICONV_ENABLED */
1747 #ifdef LIBXML_ICU_ENABLED
1748     /* check whether icu can handle this */
1749     ucv_in = openIcuConverter(name, 1);
1750     ucv_out = openIcuConverter(name, 0);
1751     if (ucv_in != NULL && ucv_out != NULL) {
1752             encu = (xmlCharEncodingHandlerPtr)
1753                    xmlMalloc(sizeof(xmlCharEncodingHandler));
1754             if (encu == NULL) {
1755                 closeIcuConverter(ucv_in);
1756                 closeIcuConverter(ucv_out);
1757                 return(NULL);
1758             }
1759             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1760             encu->name = xmlMemStrdup(name);
1761             encu->input = NULL;
1762             encu->output = NULL;
1763             encu->uconv_in = ucv_in;
1764             encu->uconv_out = ucv_out;
1765 #ifdef DEBUG_ENCODING
1766             xmlGenericError(xmlGenericErrorContext,
1767                     "Found ICU converter handler for encoding %s\n", name);
1768 #endif
1769             return encu;
1770     } else if (ucv_in != NULL || ucv_out != NULL) {
1771             closeIcuConverter(ucv_in);
1772             closeIcuConverter(ucv_out);
1773             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1774                     "ICU converter : problems with filters for '%s'\n", name);
1775     }
1776 #endif /* LIBXML_ICU_ENABLED */
1777
1778 #ifdef DEBUG_ENCODING
1779     xmlGenericError(xmlGenericErrorContext,
1780             "No handler found for encoding %s\n", name);
1781 #endif
1782
1783     /*
1784      * Fallback using the canonical names
1785      */
1786     alias = xmlParseCharEncoding(norig);
1787     if (alias != XML_CHAR_ENCODING_ERROR) {
1788         const char* canon;
1789         canon = xmlGetCharEncodingName(alias);
1790         if ((canon != NULL) && (strcmp(name, canon))) {
1791             return(xmlFindCharEncodingHandler(canon));
1792         }
1793     }
1794
1795     /* If "none of the above", give up */
1796     return(NULL);
1797 }
1798
1799 /************************************************************************
1800  *                                                                      *
1801  *              ICONV based generic conversion functions                *
1802  *                                                                      *
1803  ************************************************************************/
1804
1805 #ifdef LIBXML_ICONV_ENABLED
1806 /**
1807  * xmlIconvWrapper:
1808  * @cd:         iconv converter data structure
1809  * @out:  a pointer to an array of bytes to store the result
1810  * @outlen:  the length of @out
1811  * @in:  a pointer to an array of input bytes
1812  * @inlen:  the length of @in
1813  *
1814  * Returns 0 if success, or
1815  *     -1 by lack of space, or
1816  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1817  *        the result of transformation can't fit into the encoding we want), or
1818  *     -3 if there the last byte can't form a single output char.
1819  *
1820  * The value of @inlen after return is the number of octets consumed
1821  *     as the return value is positive, else unpredictable.
1822  * The value of @outlen after return is the number of octets produced.
1823  */
1824 static int
1825 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1826                 const unsigned char *in, int *inlen) {
1827     size_t icv_inlen, icv_outlen;
1828     const char *icv_in = (const char *) in;
1829     char *icv_out = (char *) out;
1830     size_t ret;
1831
1832     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1833         if (outlen != NULL) *outlen = 0;
1834         return(-1);
1835     }
1836     icv_inlen = *inlen;
1837     icv_outlen = *outlen;
1838     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1839     *inlen -= icv_inlen;
1840     *outlen -= icv_outlen;
1841     if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1842 #ifdef EILSEQ
1843         if (errno == EILSEQ) {
1844             return -2;
1845         } else
1846 #endif
1847 #ifdef E2BIG
1848         if (errno == E2BIG) {
1849             return -1;
1850         } else
1851 #endif
1852 #ifdef EINVAL
1853         if (errno == EINVAL) {
1854             return -3;
1855         } else
1856 #endif
1857         {
1858             return -3;
1859         }
1860     }
1861     return 0;
1862 }
1863 #endif /* LIBXML_ICONV_ENABLED */
1864
1865 /************************************************************************
1866  *                                                                      *
1867  *              ICU based generic conversion functions          *
1868  *                                                                      *
1869  ************************************************************************/
1870
1871 #ifdef LIBXML_ICU_ENABLED
1872 /**
1873  * xmlUconvWrapper:
1874  * @cd: ICU uconverter data structure
1875  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1876  * @out:  a pointer to an array of bytes to store the result
1877  * @outlen:  the length of @out
1878  * @in:  a pointer to an array of input bytes
1879  * @inlen:  the length of @in
1880  * @flush: if true, indicates end of input
1881  *
1882  * Returns 0 if success, or
1883  *     -1 by lack of space, or
1884  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1885  *        the result of transformation can't fit into the encoding we want), or
1886  *     -3 if there the last byte can't form a single output char.
1887  *
1888  * The value of @inlen after return is the number of octets consumed
1889  *     as the return value is positive, else unpredictable.
1890  * The value of @outlen after return is the number of octets produced.
1891  */
1892 static int
1893 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1894                 const unsigned char *in, int *inlen, int flush) {
1895     const char *ucv_in = (const char *) in;
1896     char *ucv_out = (char *) out;
1897     UErrorCode err = U_ZERO_ERROR;
1898
1899     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1900         if (outlen != NULL) *outlen = 0;
1901         return(-1);
1902     }
1903
1904     if (toUnicode) {
1905         /* encoding => UTF-16 => UTF-8 */
1906         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1907                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1908                        &cd->pivot_source, &cd->pivot_target,
1909                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1910     } else {
1911         /* UTF-8 => UTF-16 => encoding */
1912         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1913                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1914                        &cd->pivot_source, &cd->pivot_target,
1915                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1916     }
1917     *inlen = ucv_in - (const char*) in;
1918     *outlen = ucv_out - (char *) out;
1919     if (U_SUCCESS(err)) {
1920         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1921         if (flush)
1922             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1923         return 0;
1924     }
1925     if (err == U_BUFFER_OVERFLOW_ERROR)
1926         return -1;
1927     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1928         return -2;
1929     return -3;
1930 }
1931 #endif /* LIBXML_ICU_ENABLED */
1932
1933 /************************************************************************
1934  *                                                                      *
1935  *              The real API used by libxml for on-the-fly conversion   *
1936  *                                                                      *
1937  ************************************************************************/
1938
1939 /**
1940  * xmlEncInputChunk:
1941  * @handler:  encoding handler
1942  * @out:  a pointer to an array of bytes to store the result
1943  * @outlen:  the length of @out
1944  * @in:  a pointer to an array of input bytes
1945  * @inlen:  the length of @in
1946  * @flush:  flush (ICU-related)
1947  *
1948  * Returns 0 if success, or
1949  *     -1 by lack of space, or
1950  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1951  *        the result of transformation can't fit into the encoding we want), or
1952  *     -3 if there the last byte can't form a single output char.
1953  *
1954  * The value of @inlen after return is the number of octets consumed
1955  *     as the return value is 0, else unpredictable.
1956  * The value of @outlen after return is the number of octets produced.
1957  */
1958 static int
1959 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1960                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1961     int ret;
1962     (void)flush;
1963
1964     if (handler->input != NULL) {
1965         ret = handler->input(out, outlen, in, inlen);
1966         if (ret > 0)
1967            ret = 0;
1968     }
1969 #ifdef LIBXML_ICONV_ENABLED
1970     else if (handler->iconv_in != NULL) {
1971         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1972     }
1973 #endif /* LIBXML_ICONV_ENABLED */
1974 #ifdef LIBXML_ICU_ENABLED
1975     else if (handler->uconv_in != NULL) {
1976         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1977                               flush);
1978     }
1979 #endif /* LIBXML_ICU_ENABLED */
1980     else {
1981         *outlen = 0;
1982         *inlen = 0;
1983         ret = -2;
1984     }
1985
1986     return(ret);
1987 }
1988
1989 /**
1990  * xmlEncOutputChunk:
1991  * @handler:  encoding handler
1992  * @out:  a pointer to an array of bytes to store the result
1993  * @outlen:  the length of @out
1994  * @in:  a pointer to an array of input bytes
1995  * @inlen:  the length of @in
1996  *
1997  * Returns 0 if success, or
1998  *     -1 by lack of space, or
1999  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2000  *        the result of transformation can't fit into the encoding we want), or
2001  *     -3 if there the last byte can't form a single output char.
2002  *     -4 if no output function was found.
2003  *
2004  * The value of @inlen after return is the number of octets consumed
2005  *     as the return value is 0, else unpredictable.
2006  * The value of @outlen after return is the number of octets produced.
2007  */
2008 static int
2009 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2010                   int *outlen, const unsigned char *in, int *inlen) {
2011     int ret;
2012
2013     if (handler->output != NULL) {
2014         ret = handler->output(out, outlen, in, inlen);
2015         if (ret > 0)
2016            ret = 0;
2017     }
2018 #ifdef LIBXML_ICONV_ENABLED
2019     else if (handler->iconv_out != NULL) {
2020         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2021     }
2022 #endif /* LIBXML_ICONV_ENABLED */
2023 #ifdef LIBXML_ICU_ENABLED
2024     else if (handler->uconv_out != NULL) {
2025         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2026                               1);
2027     }
2028 #endif /* LIBXML_ICU_ENABLED */
2029     else {
2030         *outlen = 0;
2031         *inlen = 0;
2032         ret = -4;
2033     }
2034
2035     return(ret);
2036 }
2037
2038 /**
2039  * xmlCharEncFirstLineInt:
2040  * @handler:    char encoding transformation data structure
2041  * @out:  an xmlBuffer for the output.
2042  * @in:  an xmlBuffer for the input
2043  * @len:  number of bytes to convert for the first line, or -1
2044  *
2045  * Front-end for the encoding handler input function, but handle only
2046  * the very first line, i.e. limit itself to 45 chars.
2047  *
2048  * Returns the number of byte written if success, or
2049  *     -1 general error
2050  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2051  *        the result of transformation can't fit into the encoding we want), or
2052  */
2053 int
2054 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2055                        xmlBufferPtr in, int len) {
2056     int ret;
2057     int written;
2058     int toconv;
2059
2060     if (handler == NULL) return(-1);
2061     if (out == NULL) return(-1);
2062     if (in == NULL) return(-1);
2063
2064     /* calculate space available */
2065     written = out->size - out->use - 1; /* count '\0' */
2066     toconv = in->use;
2067     /*
2068      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2069      * 45 chars should be sufficient to reach the end of the encoding
2070      * declaration without going too far inside the document content.
2071      * on UTF-16 this means 90bytes, on UCS4 this means 180
2072      * The actual value depending on guessed encoding is passed as @len
2073      * if provided
2074      */
2075     if (len >= 0) {
2076         if (toconv > len)
2077             toconv = len;
2078     } else {
2079         if (toconv > 180)
2080             toconv = 180;
2081     }
2082     if (toconv * 2 >= written) {
2083         xmlBufferGrow(out, toconv * 2);
2084         written = out->size - out->use - 1;
2085     }
2086
2087     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2088                            in->content, &toconv, 0);
2089     xmlBufferShrink(in, toconv);
2090     out->use += written;
2091     out->content[out->use] = 0;
2092     if (ret == -1) ret = -3;
2093
2094 #ifdef DEBUG_ENCODING
2095     switch (ret) {
2096         case 0:
2097             xmlGenericError(xmlGenericErrorContext,
2098                     "converted %d bytes to %d bytes of input\n",
2099                     toconv, written);
2100             break;
2101         case -1:
2102             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2103                     toconv, written, in->use);
2104             break;
2105         case -2:
2106             xmlGenericError(xmlGenericErrorContext,
2107                     "input conversion failed due to input error\n");
2108             break;
2109         case -3:
2110             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2111                     toconv, written, in->use);
2112             break;
2113         default:
2114             xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2115     }
2116 #endif /* DEBUG_ENCODING */
2117     /*
2118      * Ignore when input buffer is not on a boundary
2119      */
2120     if (ret == -3) ret = 0;
2121     if (ret == -1) ret = 0;
2122     return(written ? written : ret);
2123 }
2124
2125 /**
2126  * xmlCharEncFirstLine:
2127  * @handler:    char encoding transformation data structure
2128  * @out:  an xmlBuffer for the output.
2129  * @in:  an xmlBuffer for the input
2130  *
2131  * Front-end for the encoding handler input function, but handle only
2132  * the very first line, i.e. limit itself to 45 chars.
2133  *
2134  * Returns the number of byte written if success, or
2135  *     -1 general error
2136  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2137  *        the result of transformation can't fit into the encoding we want), or
2138  */
2139 int
2140 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2141                  xmlBufferPtr in) {
2142     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2143 }
2144
2145 /**
2146  * xmlCharEncFirstLineInput:
2147  * @input: a parser input buffer
2148  * @len:  number of bytes to convert for the first line, or -1
2149  *
2150  * Front-end for the encoding handler input function, but handle only
2151  * the very first line. Point is that this is based on autodetection
2152  * of the encoding and once that first line is converted we may find
2153  * out that a different decoder is needed to process the input.
2154  *
2155  * Returns the number of byte written if success, or
2156  *     -1 general error
2157  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2158  *        the result of transformation can't fit into the encoding we want), or
2159  */
2160 int
2161 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2162 {
2163     int ret;
2164     size_t written;
2165     size_t toconv;
2166     int c_in;
2167     int c_out;
2168     xmlBufPtr in;
2169     xmlBufPtr out;
2170
2171     if ((input == NULL) || (input->encoder == NULL) ||
2172         (input->buffer == NULL) || (input->raw == NULL))
2173         return (-1);
2174     out = input->buffer;
2175     in = input->raw;
2176
2177     toconv = xmlBufUse(in);
2178     if (toconv == 0)
2179         return (0);
2180     written = xmlBufAvail(out) - 1; /* count '\0' */
2181     /*
2182      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2183      * 45 chars should be sufficient to reach the end of the encoding
2184      * declaration without going too far inside the document content.
2185      * on UTF-16 this means 90bytes, on UCS4 this means 180
2186      * The actual value depending on guessed encoding is passed as @len
2187      * if provided
2188      */
2189     if (len >= 0) {
2190         if (toconv > (unsigned int) len)
2191             toconv = len;
2192     } else {
2193         if (toconv > 180)
2194             toconv = 180;
2195     }
2196     if (toconv * 2 >= written) {
2197         xmlBufGrow(out, toconv * 2);
2198         written = xmlBufAvail(out) - 1;
2199     }
2200     if (written > 360)
2201         written = 360;
2202
2203     c_in = toconv;
2204     c_out = written;
2205     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2206                            xmlBufContent(in), &c_in, 0);
2207     xmlBufShrink(in, c_in);
2208     xmlBufAddLen(out, c_out);
2209     if (ret == -1)
2210         ret = -3;
2211
2212     switch (ret) {
2213         case 0:
2214 #ifdef DEBUG_ENCODING
2215             xmlGenericError(xmlGenericErrorContext,
2216                             "converted %d bytes to %d bytes of input\n",
2217                             c_in, c_out);
2218 #endif
2219             break;
2220         case -1:
2221 #ifdef DEBUG_ENCODING
2222             xmlGenericError(xmlGenericErrorContext,
2223                          "converted %d bytes to %d bytes of input, %d left\n",
2224                             c_in, c_out, (int)xmlBufUse(in));
2225 #endif
2226             break;
2227         case -3:
2228 #ifdef DEBUG_ENCODING
2229             xmlGenericError(xmlGenericErrorContext,
2230                         "converted %d bytes to %d bytes of input, %d left\n",
2231                             c_in, c_out, (int)xmlBufUse(in));
2232 #endif
2233             break;
2234         case -2: {
2235             char buf[50];
2236             const xmlChar *content = xmlBufContent(in);
2237
2238             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2239                      content[0], content[1],
2240                      content[2], content[3]);
2241             buf[49] = 0;
2242             xmlEncodingErr(XML_I18N_CONV_FAILED,
2243                     "input conversion failed due to input error, bytes %s\n",
2244                            buf);
2245         }
2246     }
2247     /*
2248      * Ignore when input buffer is not on a boundary
2249      */
2250     if (ret == -3) ret = 0;
2251     if (ret == -1) ret = 0;
2252     return(c_out ? c_out : ret);
2253 }
2254
2255 /**
2256  * xmlCharEncInput:
2257  * @input: a parser input buffer
2258  * @flush: try to flush all the raw buffer
2259  *
2260  * Generic front-end for the encoding handler on parser input
2261  *
2262  * Returns the number of byte written if success, or
2263  *     -1 general error
2264  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2265  *        the result of transformation can't fit into the encoding we want), or
2266  */
2267 int
2268 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2269 {
2270     int ret;
2271     size_t written;
2272     size_t toconv;
2273     int c_in;
2274     int c_out;
2275     xmlBufPtr in;
2276     xmlBufPtr out;
2277
2278     if ((input == NULL) || (input->encoder == NULL) ||
2279         (input->buffer == NULL) || (input->raw == NULL))
2280         return (-1);
2281     out = input->buffer;
2282     in = input->raw;
2283
2284     toconv = xmlBufUse(in);
2285     if (toconv == 0)
2286         return (0);
2287     if ((toconv > 64 * 1024) && (flush == 0))
2288         toconv = 64 * 1024;
2289     written = xmlBufAvail(out);
2290     if (written > 0)
2291         written--; /* count '\0' */
2292     if (toconv * 2 >= written) {
2293         xmlBufGrow(out, toconv * 2);
2294         written = xmlBufAvail(out);
2295         if (written > 0)
2296             written--; /* count '\0' */
2297     }
2298     if ((written > 128 * 1024) && (flush == 0))
2299         written = 128 * 1024;
2300
2301     c_in = toconv;
2302     c_out = written;
2303     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2304                            xmlBufContent(in), &c_in, flush);
2305     xmlBufShrink(in, c_in);
2306     xmlBufAddLen(out, c_out);
2307     if (ret == -1)
2308         ret = -3;
2309
2310     switch (ret) {
2311         case 0:
2312 #ifdef DEBUG_ENCODING
2313             xmlGenericError(xmlGenericErrorContext,
2314                             "converted %d bytes to %d bytes of input\n",
2315                             c_in, c_out);
2316 #endif
2317             break;
2318         case -1:
2319 #ifdef DEBUG_ENCODING
2320             xmlGenericError(xmlGenericErrorContext,
2321                          "converted %d bytes to %d bytes of input, %d left\n",
2322                             c_in, c_out, (int)xmlBufUse(in));
2323 #endif
2324             break;
2325         case -3:
2326 #ifdef DEBUG_ENCODING
2327             xmlGenericError(xmlGenericErrorContext,
2328                         "converted %d bytes to %d bytes of input, %d left\n",
2329                             c_in, c_out, (int)xmlBufUse(in));
2330 #endif
2331             break;
2332         case -2: {
2333             char buf[50];
2334             const xmlChar *content = xmlBufContent(in);
2335
2336             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2337                      content[0], content[1],
2338                      content[2], content[3]);
2339             buf[49] = 0;
2340             xmlEncodingErr(XML_I18N_CONV_FAILED,
2341                     "input conversion failed due to input error, bytes %s\n",
2342                            buf);
2343         }
2344     }
2345     /*
2346      * Ignore when input buffer is not on a boundary
2347      */
2348     if (ret == -3)
2349         ret = 0;
2350     return (c_out? c_out : ret);
2351 }
2352
2353 /**
2354  * xmlCharEncInFunc:
2355  * @handler:    char encoding transformation data structure
2356  * @out:  an xmlBuffer for the output.
2357  * @in:  an xmlBuffer for the input
2358  *
2359  * Generic front-end for the encoding handler input function
2360  *
2361  * Returns the number of byte written if success, or
2362  *     -1 general error
2363  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2364  *        the result of transformation can't fit into the encoding we want), or
2365  */
2366 int
2367 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2368                  xmlBufferPtr in)
2369 {
2370     int ret;
2371     int written;
2372     int toconv;
2373
2374     if (handler == NULL)
2375         return (-1);
2376     if (out == NULL)
2377         return (-1);
2378     if (in == NULL)
2379         return (-1);
2380
2381     toconv = in->use;
2382     if (toconv == 0)
2383         return (0);
2384     written = out->size - out->use -1; /* count '\0' */
2385     if (toconv * 2 >= written) {
2386         xmlBufferGrow(out, out->size + toconv * 2);
2387         written = out->size - out->use - 1;
2388     }
2389     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2390                            in->content, &toconv, 1);
2391     xmlBufferShrink(in, toconv);
2392     out->use += written;
2393     out->content[out->use] = 0;
2394     if (ret == -1)
2395         ret = -3;
2396
2397     switch (ret) {
2398         case 0:
2399 #ifdef DEBUG_ENCODING
2400             xmlGenericError(xmlGenericErrorContext,
2401                             "converted %d bytes to %d bytes of input\n",
2402                             toconv, written);
2403 #endif
2404             break;
2405         case -1:
2406 #ifdef DEBUG_ENCODING
2407             xmlGenericError(xmlGenericErrorContext,
2408                          "converted %d bytes to %d bytes of input, %d left\n",
2409                             toconv, written, in->use);
2410 #endif
2411             break;
2412         case -3:
2413 #ifdef DEBUG_ENCODING
2414             xmlGenericError(xmlGenericErrorContext,
2415                         "converted %d bytes to %d bytes of input, %d left\n",
2416                             toconv, written, in->use);
2417 #endif
2418             break;
2419         case -2: {
2420             char buf[50];
2421
2422             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2423                      in->content[0], in->content[1],
2424                      in->content[2], in->content[3]);
2425             buf[49] = 0;
2426             xmlEncodingErr(XML_I18N_CONV_FAILED,
2427                     "input conversion failed due to input error, bytes %s\n",
2428                            buf);
2429         }
2430     }
2431     /*
2432      * Ignore when input buffer is not on a boundary
2433      */
2434     if (ret == -3)
2435         ret = 0;
2436     return (written? written : ret);
2437 }
2438
2439 #ifdef LIBXML_OUTPUT_ENABLED
2440 /**
2441  * xmlCharEncOutput:
2442  * @output: a parser output buffer
2443  * @init: is this an initialization call without data
2444  *
2445  * Generic front-end for the encoding handler on parser output
2446  * a first call with @init == 1 has to be made first to initiate the
2447  * output in case of non-stateless encoding needing to initiate their
2448  * state or the output (like the BOM in UTF16).
2449  * In case of UTF8 sequence conversion errors for the given encoder,
2450  * the content will be automatically remapped to a CharRef sequence.
2451  *
2452  * Returns the number of byte written if success, or
2453  *     -1 general error
2454  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2455  *        the result of transformation can't fit into the encoding we want), or
2456  */
2457 int
2458 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2459 {
2460     int ret;
2461     size_t written;
2462     int writtentot = 0;
2463     size_t toconv;
2464     int c_in;
2465     int c_out;
2466     xmlBufPtr in;
2467     xmlBufPtr out;
2468
2469     if ((output == NULL) || (output->encoder == NULL) ||
2470         (output->buffer == NULL) || (output->conv == NULL))
2471         return (-1);
2472     out = output->conv;
2473     in = output->buffer;
2474
2475 retry:
2476
2477     written = xmlBufAvail(out);
2478     if (written > 0)
2479         written--; /* count '\0' */
2480
2481     /*
2482      * First specific handling of the initialization call
2483      */
2484     if (init) {
2485         c_in = 0;
2486         c_out = written;
2487         /* TODO: Check return value. */
2488         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2489                           NULL, &c_in);
2490         xmlBufAddLen(out, c_out);
2491 #ifdef DEBUG_ENCODING
2492         xmlGenericError(xmlGenericErrorContext,
2493                 "initialized encoder\n");
2494 #endif
2495         return(c_out);
2496     }
2497
2498     /*
2499      * Conversion itself.
2500      */
2501     toconv = xmlBufUse(in);
2502     if (toconv == 0)
2503         return (writtentot);
2504     if (toconv > 64 * 1024)
2505         toconv = 64 * 1024;
2506     if (toconv * 4 >= written) {
2507         xmlBufGrow(out, toconv * 4);
2508         written = xmlBufAvail(out) - 1;
2509     }
2510     if (written > 256 * 1024)
2511         written = 256 * 1024;
2512
2513     c_in = toconv;
2514     c_out = written;
2515     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2516                             xmlBufContent(in), &c_in);
2517     xmlBufShrink(in, c_in);
2518     xmlBufAddLen(out, c_out);
2519     writtentot += c_out;
2520     if (ret == -1) {
2521         if (c_out > 0) {
2522             /* Can be a limitation of iconv or uconv */
2523             goto retry;
2524         }
2525         ret = -3;
2526     }
2527
2528     /*
2529      * Attempt to handle error cases
2530      */
2531     switch (ret) {
2532         case 0:
2533 #ifdef DEBUG_ENCODING
2534             xmlGenericError(xmlGenericErrorContext,
2535                     "converted %d bytes to %d bytes of output\n",
2536                     c_in, c_out);
2537 #endif
2538             break;
2539         case -1:
2540 #ifdef DEBUG_ENCODING
2541             xmlGenericError(xmlGenericErrorContext,
2542                     "output conversion failed by lack of space\n");
2543 #endif
2544             break;
2545         case -3:
2546 #ifdef DEBUG_ENCODING
2547             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2548                     c_in, c_out, (int) xmlBufUse(in));
2549 #endif
2550             break;
2551         case -4:
2552             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2553                            "xmlCharEncOutFunc: no output function !\n", NULL);
2554             ret = -1;
2555             break;
2556         case -2: {
2557             xmlChar charref[20];
2558             int len = (int) xmlBufUse(in);
2559             xmlChar *content = xmlBufContent(in);
2560             int cur, charrefLen;
2561
2562             cur = xmlGetUTF8Char(content, &len);
2563             if (cur <= 0)
2564                 break;
2565
2566 #ifdef DEBUG_ENCODING
2567             xmlGenericError(xmlGenericErrorContext,
2568                     "handling output conversion error\n");
2569             xmlGenericError(xmlGenericErrorContext,
2570                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2571                     content[0], content[1],
2572                     content[2], content[3]);
2573 #endif
2574             /*
2575              * Removes the UTF8 sequence, and replace it by a charref
2576              * and continue the transcoding phase, hoping the error
2577              * did not mangle the encoder state.
2578              */
2579             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2580                              "&#%d;", cur);
2581             xmlBufShrink(in, len);
2582             xmlBufGrow(out, charrefLen * 4);
2583             c_out = xmlBufAvail(out) - 1;
2584             c_in = charrefLen;
2585             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2586                                     charref, &c_in);
2587
2588             if ((ret < 0) || (c_in != charrefLen)) {
2589                 char buf[50];
2590
2591                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2592                          content[0], content[1],
2593                          content[2], content[3]);
2594                 buf[49] = 0;
2595                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2596                     "output conversion failed due to conv error, bytes %s\n",
2597                                buf);
2598                 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2599                     content[0] = ' ';
2600                 break;
2601             }
2602
2603             xmlBufAddLen(out, c_out);
2604             writtentot += c_out;
2605             goto retry;
2606         }
2607     }
2608     return(writtentot ? writtentot : ret);
2609 }
2610 #endif
2611
2612 /**
2613  * xmlCharEncOutFunc:
2614  * @handler:    char encoding transformation data structure
2615  * @out:  an xmlBuffer for the output.
2616  * @in:  an xmlBuffer for the input
2617  *
2618  * Generic front-end for the encoding handler output function
2619  * a first call with @in == NULL has to be made firs to initiate the
2620  * output in case of non-stateless encoding needing to initiate their
2621  * state or the output (like the BOM in UTF16).
2622  * In case of UTF8 sequence conversion errors for the given encoder,
2623  * the content will be automatically remapped to a CharRef sequence.
2624  *
2625  * Returns the number of byte written if success, or
2626  *     -1 general error
2627  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2628  *        the result of transformation can't fit into the encoding we want), or
2629  */
2630 int
2631 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2632                   xmlBufferPtr in) {
2633     int ret;
2634     int written;
2635     int writtentot = 0;
2636     int toconv;
2637
2638     if (handler == NULL) return(-1);
2639     if (out == NULL) return(-1);
2640
2641 retry:
2642
2643     written = out->size - out->use;
2644
2645     if (written > 0)
2646         written--; /* Gennady: count '/0' */
2647
2648     /*
2649      * First specific handling of in = NULL, i.e. the initialization call
2650      */
2651     if (in == NULL) {
2652         toconv = 0;
2653         /* TODO: Check return value. */
2654         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2655                           NULL, &toconv);
2656         out->use += written;
2657         out->content[out->use] = 0;
2658 #ifdef DEBUG_ENCODING
2659         xmlGenericError(xmlGenericErrorContext,
2660                 "initialized encoder\n");
2661 #endif
2662         return(0);
2663     }
2664
2665     /*
2666      * Conversion itself.
2667      */
2668     toconv = in->use;
2669     if (toconv == 0)
2670         return(0);
2671     if (toconv * 4 >= written) {
2672         xmlBufferGrow(out, toconv * 4);
2673         written = out->size - out->use - 1;
2674     }
2675     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2676                             in->content, &toconv);
2677     xmlBufferShrink(in, toconv);
2678     out->use += written;
2679     writtentot += written;
2680     out->content[out->use] = 0;
2681     if (ret == -1) {
2682         if (written > 0) {
2683             /* Can be a limitation of iconv or uconv */
2684             goto retry;
2685         }
2686         ret = -3;
2687     }
2688
2689     /*
2690      * Attempt to handle error cases
2691      */
2692     switch (ret) {
2693         case 0:
2694 #ifdef DEBUG_ENCODING
2695             xmlGenericError(xmlGenericErrorContext,
2696                     "converted %d bytes to %d bytes of output\n",
2697                     toconv, written);
2698 #endif
2699             break;
2700         case -1:
2701 #ifdef DEBUG_ENCODING
2702             xmlGenericError(xmlGenericErrorContext,
2703                     "output conversion failed by lack of space\n");
2704 #endif
2705             break;
2706         case -3:
2707 #ifdef DEBUG_ENCODING
2708             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2709                     toconv, written, in->use);
2710 #endif
2711             break;
2712         case -4:
2713             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2714                            "xmlCharEncOutFunc: no output function !\n", NULL);
2715             ret = -1;
2716             break;
2717         case -2: {
2718             xmlChar charref[20];
2719             int len = in->use;
2720             const xmlChar *utf = (const xmlChar *) in->content;
2721             int cur, charrefLen;
2722
2723             cur = xmlGetUTF8Char(utf, &len);
2724             if (cur <= 0)
2725                 break;
2726
2727 #ifdef DEBUG_ENCODING
2728             xmlGenericError(xmlGenericErrorContext,
2729                     "handling output conversion error\n");
2730             xmlGenericError(xmlGenericErrorContext,
2731                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2732                     in->content[0], in->content[1],
2733                     in->content[2], in->content[3]);
2734 #endif
2735             /*
2736              * Removes the UTF8 sequence, and replace it by a charref
2737              * and continue the transcoding phase, hoping the error
2738              * did not mangle the encoder state.
2739              */
2740             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2741                              "&#%d;", cur);
2742             xmlBufferShrink(in, len);
2743             xmlBufferGrow(out, charrefLen * 4);
2744             written = out->size - out->use - 1;
2745             toconv = charrefLen;
2746             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2747                                     charref, &toconv);
2748
2749             if ((ret < 0) || (toconv != charrefLen)) {
2750                 char buf[50];
2751
2752                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2753                          in->content[0], in->content[1],
2754                          in->content[2], in->content[3]);
2755                 buf[49] = 0;
2756                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2757                     "output conversion failed due to conv error, bytes %s\n",
2758                                buf);
2759                 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2760                     in->content[0] = ' ';
2761                 break;
2762             }
2763
2764             out->use += written;
2765             writtentot += written;
2766             out->content[out->use] = 0;
2767             goto retry;
2768         }
2769     }
2770     return(writtentot ? writtentot : ret);
2771 }
2772
2773 /**
2774  * xmlCharEncCloseFunc:
2775  * @handler:    char encoding transformation data structure
2776  *
2777  * Generic front-end for encoding handler close function
2778  *
2779  * Returns 0 if success, or -1 in case of error
2780  */
2781 int
2782 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2783     int ret = 0;
2784     int tofree = 0;
2785     int i, handler_in_list = 0;
2786
2787     if (handler == NULL) return(-1);
2788     if (handler->name == NULL) return(-1);
2789     if (handlers != NULL) {
2790         for (i = 0;i < nbCharEncodingHandler; i++) {
2791             if (handler == handlers[i]) {
2792                 handler_in_list = 1;
2793                 break;
2794             }
2795         }
2796     }
2797 #ifdef LIBXML_ICONV_ENABLED
2798     /*
2799      * Iconv handlers can be used only once, free the whole block.
2800      * and the associated icon resources.
2801      */
2802     if ((handler_in_list == 0) &&
2803         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2804         tofree = 1;
2805         if (handler->iconv_out != NULL) {
2806             if (iconv_close(handler->iconv_out))
2807                 ret = -1;
2808             handler->iconv_out = NULL;
2809         }
2810         if (handler->iconv_in != NULL) {
2811             if (iconv_close(handler->iconv_in))
2812                 ret = -1;
2813             handler->iconv_in = NULL;
2814         }
2815     }
2816 #endif /* LIBXML_ICONV_ENABLED */
2817 #ifdef LIBXML_ICU_ENABLED
2818     if ((handler_in_list == 0) &&
2819         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2820         tofree = 1;
2821         if (handler->uconv_out != NULL) {
2822             closeIcuConverter(handler->uconv_out);
2823             handler->uconv_out = NULL;
2824         }
2825         if (handler->uconv_in != NULL) {
2826             closeIcuConverter(handler->uconv_in);
2827             handler->uconv_in = NULL;
2828         }
2829     }
2830 #endif
2831     if (tofree) {
2832         /* free up only dynamic handlers iconv/uconv */
2833         if (handler->name != NULL)
2834             xmlFree(handler->name);
2835         handler->name = NULL;
2836         xmlFree(handler);
2837     }
2838 #ifdef DEBUG_ENCODING
2839     if (ret)
2840         xmlGenericError(xmlGenericErrorContext,
2841                 "failed to close the encoding handler\n");
2842     else
2843         xmlGenericError(xmlGenericErrorContext,
2844                 "closed the encoding handler\n");
2845 #endif
2846
2847     return(ret);
2848 }
2849
2850 /**
2851  * xmlByteConsumed:
2852  * @ctxt: an XML parser context
2853  *
2854  * This function provides the current index of the parser relative
2855  * to the start of the current entity. This function is computed in
2856  * bytes from the beginning starting at zero and finishing at the
2857  * size in byte of the file if parsing a file. The function is
2858  * of constant cost if the input is UTF-8 but can be costly if run
2859  * on non-UTF-8 input.
2860  *
2861  * Returns the index in bytes from the beginning of the entity or -1
2862  *         in case the index could not be computed.
2863  */
2864 long
2865 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2866     xmlParserInputPtr in;
2867
2868     if (ctxt == NULL) return(-1);
2869     in = ctxt->input;
2870     if (in == NULL)  return(-1);
2871     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2872         unsigned int unused = 0;
2873         xmlCharEncodingHandler * handler = in->buf->encoder;
2874         /*
2875          * Encoding conversion, compute the number of unused original
2876          * bytes from the input not consumed and subtract that from
2877          * the raw consumed value, this is not a cheap operation
2878          */
2879         if (in->end - in->cur > 0) {
2880             unsigned char convbuf[32000];
2881             const unsigned char *cur = (const unsigned char *)in->cur;
2882             int toconv = in->end - in->cur, written = 32000;
2883
2884             int ret;
2885
2886             do {
2887                 toconv = in->end - cur;
2888                 written = 32000;
2889                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2890                                         cur, &toconv);
2891                 if (ret < 0) {
2892                     if (written > 0)
2893                         ret = -2;
2894                     else
2895                         return(-1);
2896                 }
2897                 unused += written;
2898                 cur += toconv;
2899             } while (ret == -2);
2900         }
2901         if (in->buf->rawconsumed < unused)
2902             return(-1);
2903         return(in->buf->rawconsumed - unused);
2904     }
2905     return(in->consumed + (in->cur - in->base));
2906 }
2907
2908 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2909 #ifdef LIBXML_ISO8859X_ENABLED
2910
2911 /**
2912  * UTF8ToISO8859x:
2913  * @out:  a pointer to an array of bytes to store the result
2914  * @outlen:  the length of @out
2915  * @in:  a pointer to an array of UTF-8 chars
2916  * @inlen:  the length of @in
2917  * @xlattable: the 2-level transcoding table
2918  *
2919  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2920  * block of chars out.
2921  *
2922  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2923  * The value of @inlen after return is the number of octets consumed
2924  *     as the return value is positive, else unpredictable.
2925  * The value of @outlen after return is the number of octets consumed.
2926  */
2927 static int
2928 UTF8ToISO8859x(unsigned char* out, int *outlen,
2929               const unsigned char* in, int *inlen,
2930               unsigned char const *xlattable) {
2931     const unsigned char* outstart = out;
2932     const unsigned char* inend;
2933     const unsigned char* instart = in;
2934     const unsigned char* processed = in;
2935
2936     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2937         (xlattable == NULL))
2938         return(-1);
2939     if (in == NULL) {
2940         /*
2941         * initialization nothing to do
2942         */
2943         *outlen = 0;
2944         *inlen = 0;
2945         return(0);
2946     }
2947     inend = in + (*inlen);
2948     while (in < inend) {
2949         unsigned char d = *in++;
2950         if  (d < 0x80)  {
2951             *out++ = d;
2952         } else if (d < 0xC0) {
2953             /* trailing byte in leading position */
2954             *outlen = out - outstart;
2955             *inlen = processed - instart;
2956             return(-2);
2957         } else if (d < 0xE0) {
2958             unsigned char c;
2959             if (!(in < inend)) {
2960                 /* trailing byte not in input buffer */
2961                 *outlen = out - outstart;
2962                 *inlen = processed - instart;
2963                 return(-3);
2964             }
2965             c = *in++;
2966             if ((c & 0xC0) != 0x80) {
2967                 /* not a trailing byte */
2968                 *outlen = out - outstart;
2969                 *inlen = processed - instart;
2970                 return(-2);
2971             }
2972             c = c & 0x3F;
2973             d = d & 0x1F;
2974             d = xlattable [48 + c + xlattable [d] * 64];
2975             if (d == 0) {
2976                 /* not in character set */
2977                 *outlen = out - outstart;
2978                 *inlen = processed - instart;
2979                 return(-2);
2980             }
2981             *out++ = d;
2982         } else if (d < 0xF0) {
2983             unsigned char c1;
2984             unsigned char c2;
2985             if (!(in < inend - 1)) {
2986                 /* trailing bytes not in input buffer */
2987                 *outlen = out - outstart;
2988                 *inlen = processed - instart;
2989                 return(-3);
2990             }
2991             c1 = *in++;
2992             if ((c1 & 0xC0) != 0x80) {
2993                 /* not a trailing byte (c1) */
2994                 *outlen = out - outstart;
2995                 *inlen = processed - instart;
2996                 return(-2);
2997             }
2998             c2 = *in++;
2999             if ((c2 & 0xC0) != 0x80) {
3000                 /* not a trailing byte (c2) */
3001                 *outlen = out - outstart;
3002                 *inlen = processed - instart;
3003                 return(-2);
3004             }
3005             c1 = c1 & 0x3F;
3006             c2 = c2 & 0x3F;
3007             d = d & 0x0F;
3008             d = xlattable [48 + c2 + xlattable [48 + c1 +
3009                         xlattable [32 + d] * 64] * 64];
3010             if (d == 0) {
3011                 /* not in character set */
3012                 *outlen = out - outstart;
3013                 *inlen = processed - instart;
3014                 return(-2);
3015             }
3016             *out++ = d;
3017         } else {
3018             /* cannot transcode >= U+010000 */
3019             *outlen = out - outstart;
3020             *inlen = processed - instart;
3021             return(-2);
3022         }
3023         processed = in;
3024     }
3025     *outlen = out - outstart;
3026     *inlen = processed - instart;
3027     return(*outlen);
3028 }
3029
3030 /**
3031  * ISO8859xToUTF8
3032  * @out:  a pointer to an array of bytes to store the result
3033  * @outlen:  the length of @out
3034  * @in:  a pointer to an array of ISO Latin 1 chars
3035  * @inlen:  the length of @in
3036  *
3037  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3038  * block of chars out.
3039  * Returns 0 if success, or -1 otherwise
3040  * The value of @inlen after return is the number of octets consumed
3041  * The value of @outlen after return is the number of octets produced.
3042  */
3043 static int
3044 ISO8859xToUTF8(unsigned char* out, int *outlen,
3045               const unsigned char* in, int *inlen,
3046               unsigned short const *unicodetable) {
3047     unsigned char* outstart = out;
3048     unsigned char* outend;
3049     const unsigned char* instart = in;
3050     const unsigned char* inend;
3051     const unsigned char* instop;
3052     unsigned int c;
3053
3054     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3055         (in == NULL) || (unicodetable == NULL))
3056         return(-1);
3057     outend = out + *outlen;
3058     inend = in + *inlen;
3059     instop = inend;
3060
3061     while ((in < inend) && (out < outend - 2)) {
3062         if (*in >= 0x80) {
3063             c = unicodetable [*in - 0x80];
3064             if (c == 0) {
3065                 /* undefined code point */
3066                 *outlen = out - outstart;
3067                 *inlen = in - instart;
3068                 return (-1);
3069             }
3070             if (c < 0x800) {
3071                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3072                 *out++ = (c & 0x3F) | 0x80;
3073             } else {
3074                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3075                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3076                 *out++ = (c & 0x3F) | 0x80;
3077             }
3078             ++in;
3079         }
3080         if (instop - in > outend - out) instop = in + (outend - out);
3081         while ((*in < 0x80) && (in < instop)) {
3082             *out++ = *in++;
3083         }
3084     }
3085     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3086         *out++ =  *in++;
3087     }
3088     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3089         *out++ =  *in++;
3090     }
3091     *outlen = out - outstart;
3092     *inlen = in - instart;
3093     return (*outlen);
3094 }
3095
3096
3097 /************************************************************************
3098  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3099  ************************************************************************/
3100
3101 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3102     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3103     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3104     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3105     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3106     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3107     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3108     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3109     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3110     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3111     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3112     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3113     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3114     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3115     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3116     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3117     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3118 };
3119
3120 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3121     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3122     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3129     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3130     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3131     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3132     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3133     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3136     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3137     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3138     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3141     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3142     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3143     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3144     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3145     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3146     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3147     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3148 };
3149
3150 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3151     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3152     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3153     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3154     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3155     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3156     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3157     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3158     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3159     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3160     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3161     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3162     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3163     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3164     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3165     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3166     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3167 };
3168
3169 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3170     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3171     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3178     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3179     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3180     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3181     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3182     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3183     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3187     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3195     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3196     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3197     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3198     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3199     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3200     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3201 };
3202
3203 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3204     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3205     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3206     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3207     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3208     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3209     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3210     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3211     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3212     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3213     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3214     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3215     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3216     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3217     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3218     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3219     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3220 };
3221
3222 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3223     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3224     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3231     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3232     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3233     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3234     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3235     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3236     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3237     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3238     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3239     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3240     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3241     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3242     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3243     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3244     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3247     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3248     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3249     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3250 };
3251
3252 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3253     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3254     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3255     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3256     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3257     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3258     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3259     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3260     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3261     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3262     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3263     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3264     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3265     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3266     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3267     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3268     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3269 };
3270
3271 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3272     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3280     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3281     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3282     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3284     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3285     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3286     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3287     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3288     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3289     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 };
3300
3301 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3302     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3303     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3304     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3305     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3306     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3307     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3308     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3309     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3310     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3311     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3312     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3313     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3314     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3315     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3316     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3317     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3318 };
3319
3320 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3321     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3323     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3329     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3330     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3331     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3337     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3338     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3339     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3340     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3341     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344 };
3345
3346 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3347     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3348     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3349     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3350     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3351     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3352     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3353     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3354     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3355     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3356     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3357     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3358     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3359     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3360     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3361     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3362     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3363 };
3364
3365 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3366     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3374     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3375     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3376     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3377     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3383     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3390     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3391     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3392     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3393     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3394     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 };
3398
3399 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3400     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3401     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3402     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3403     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3404     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3405     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3406     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3407     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3408     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3410     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3411     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3412     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3413     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3414     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3415     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3416 };
3417
3418 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3419     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3421     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3427     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3428     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3429     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3430     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3436     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3438     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3443     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3444     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3448     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3449     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450 };
3451
3452 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3453     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3454     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3455     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3456     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3457     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3458     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3459     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3460     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3461     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3462     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3463     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3464     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3465     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3466     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3467     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3468     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3469 };
3470
3471 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3472     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3480     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3481     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3482     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3483     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3484     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3485     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3486     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3487     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3489     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3493     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495 };
3496
3497 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3498     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3499     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3500     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3501     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3502     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3503     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3504     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3505     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3506     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3507     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3508     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3509     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3510     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3511     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3512     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3513     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3514 };
3515
3516 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3517     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3523     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3524     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3525     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3526     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3527     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3528     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3529     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3530     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3531     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3532     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3533     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3535     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3536     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3545     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3546     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3547     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3548 };
3549
3550 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3551     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3552     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3553     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3554     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3555     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3556     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3557     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3558     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3559     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3560     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3561     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3562     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3563     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3564     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3565     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3566     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3567 };
3568
3569 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3570     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3578     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3579     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3585     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3586     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3587     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3588     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3589     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3594     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3595     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597 };
3598
3599 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3600     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3601     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3602     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3603     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3604     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3605     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3606     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3607     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3608     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3609     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3610     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3611     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3612     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3613     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3614     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3615     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3616 };
3617
3618 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3619     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3627     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3628     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3629     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3630     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3636     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3639     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3640     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3641     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3642     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3643     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3644     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3645     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3646     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3647     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3648     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3649     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3650 };
3651
3652 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3653     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3658     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3659     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3660     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3661     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3664     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3665     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3668     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3669 };
3670
3671 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3672     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3682     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3687     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3692     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3705     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3707     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3709     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3710     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3712     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3713     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3714     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3715 };
3716
3717 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3718     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3719     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3720     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3721     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3722     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3723     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3724     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3725     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3726     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3727     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3728     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3729     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3730     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3731     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3732     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3733     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3734 };
3735
3736 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3737     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3745     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3746     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3747     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3748     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3760     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3761     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3762     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3763     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3764 };
3765
3766 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3767     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3768     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3769     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3770     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3771     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3772     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3773     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3774     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3775     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3776     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3777     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3778     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3779     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3780     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3781     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3782     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3783 };
3784
3785 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3786     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3794     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3795     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3796     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3797     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3798     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3799     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3803     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3805     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3812     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3815     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3819     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3820     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3822     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3823     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3824     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3825 };
3826
3827
3828 /*
3829  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3830  */
3831
3832 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3833     const unsigned char* in, int *inlen) {
3834     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3835 }
3836 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3837     const unsigned char* in, int *inlen) {
3838     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3839 }
3840
3841 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3842     const unsigned char* in, int *inlen) {
3843     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3844 }
3845 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3846     const unsigned char* in, int *inlen) {
3847     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3848 }
3849
3850 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3851     const unsigned char* in, int *inlen) {
3852     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3853 }
3854 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3855     const unsigned char* in, int *inlen) {
3856     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3857 }
3858
3859 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3860     const unsigned char* in, int *inlen) {
3861     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3862 }
3863 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3864     const unsigned char* in, int *inlen) {
3865     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3866 }
3867
3868 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3869     const unsigned char* in, int *inlen) {
3870     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3871 }
3872 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3873     const unsigned char* in, int *inlen) {
3874     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3875 }
3876
3877 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3878     const unsigned char* in, int *inlen) {
3879     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3880 }
3881 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3882     const unsigned char* in, int *inlen) {
3883     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3884 }
3885
3886 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3887     const unsigned char* in, int *inlen) {
3888     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3889 }
3890 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3891     const unsigned char* in, int *inlen) {
3892     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3893 }
3894
3895 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3896     const unsigned char* in, int *inlen) {
3897     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3898 }
3899 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3900     const unsigned char* in, int *inlen) {
3901     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3902 }
3903
3904 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3905     const unsigned char* in, int *inlen) {
3906     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3907 }
3908 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3909     const unsigned char* in, int *inlen) {
3910     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3911 }
3912
3913 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3914     const unsigned char* in, int *inlen) {
3915     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3916 }
3917 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3918     const unsigned char* in, int *inlen) {
3919     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3920 }
3921
3922 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3923     const unsigned char* in, int *inlen) {
3924     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3925 }
3926 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3927     const unsigned char* in, int *inlen) {
3928     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3929 }
3930
3931 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3932     const unsigned char* in, int *inlen) {
3933     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3934 }
3935 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3936     const unsigned char* in, int *inlen) {
3937     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3938 }
3939
3940 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3941     const unsigned char* in, int *inlen) {
3942     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3943 }
3944 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3945     const unsigned char* in, int *inlen) {
3946     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3947 }
3948
3949 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3950     const unsigned char* in, int *inlen) {
3951     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3952 }
3953 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3954     const unsigned char* in, int *inlen) {
3955     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3956 }
3957
3958 static void
3959 xmlRegisterCharEncodingHandlersISO8859x (void) {
3960     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3961     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3962     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3963     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3964     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3965     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3966     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3967     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3968     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3969     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3970     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3971     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3972     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3973     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3974 }
3975
3976 #endif
3977 #endif
3978
3979 #define bottom_encoding
3980 #include "elfgcchack.h"