iconvdata/iso-2022-jp.c

   1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
   2    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <dlfcn.h>
  22 #include <gconv.h>
  23 #include <stdint.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include "jis0201.h"
  27 #include "jis0208.h"
  28 #include "jis0212.h"
  29 #include "gb2312.h"
  30 #include "ksc5601.h"
  31
  32 struct gap
  33 {
  34   uint16_t start;
  35   uint16_t end;
  36   int32_t idx;
  37 };
  38
  39 #include "iso8859-7jp.h"
  40
  41 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
  42 #define ESC 0x1b
  43
  44 /* We provide our own initialization and destructor function.  */
  45 #define DEFINE_INIT     0
  46 #define DEFINE_FINI     0
  47
  48 /* Definitions used in the body of the `gconv' function.  */
  49 #define FROM_LOOP               from_iso2022jp_loop
  50 #define TO_LOOP                 to_iso2022jp_loop
  51 #define MIN_NEEDED_FROM         1
  52 #define MAX_NEEDED_FROM         4
  53 #define MIN_NEEDED_TO           4
  54 #define MAX_NEEDED_TO           4
  55 #define FROM_DIRECTION          (dir == from_iso2022jp)
  56 #define PREPARE_LOOP \
  57   enum direction dir = ((struct iso2022jp_data *) step->__data)->dir;         \
  58   enum variant var = ((struct iso2022jp_data *) step->__data)->var;           \
  59   int save_set;                                                               \
  60   int *setp = &data->__statep->__count;
  61 #define EXTRA_LOOP_ARGS         , var, setp
  62
  63
  64 /* Direction of the transformation.  */
  65 enum direction
  66 {
  67   illegal_dir,
  68   to_iso2022jp,
  69   from_iso2022jp
  70 };
  71
  72 /* We handle ISO-2022-jp and ISO-2022-JP-2 here.  */
  73 enum variant
  74 {
  75   illegal_var,
  76   iso2022jp,
  77   iso2022jp2
  78 };
  79
  80
  81 struct iso2022jp_data
  82 {
  83   enum direction dir;
  84   enum variant var;
  85 };
  86
  87
  88 /* The COUNT element of the state keeps track of the currently selected
  89    character set.  The possible values are:  */
  90 enum
  91 {
  92   ASCII_set = 0,
  93   JISX0208_1978_set = 1 << 3,
  94   JISX0208_1983_set = 2 << 3,
  95   JISX0201_Roman_set = 3 << 3,
  96   JISX0201_Kana_set = 4 << 3,
  97   GB2312_set = 5 << 3,
  98   KSC5601_set = 6 << 3,
  99   JISX0212_set = 7 << 3,
 100   CURRENT_SEL_MASK = 7 << 3
 101 };
 102
 103 /* The second value stored is the designation of the G2 set.  The following
 104    values are possible:  */
 105 enum
 106 {
 107   UNSPECIFIED_set = 0,
 108   ISO88591_set = 1 << 6,
 109   ISO88597_set = 2 << 6,
 110   CURRENT_ASSIGN_MASK = 3 << 6
 111 };
 112
 113 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
 114    describes the language tag parsing status.  The possible values are as
 115    follows.  Values >= TAG_language are temporary tag parsing states.  */
 116 enum
 117 {
 118   TAG_none = 0,
 119   TAG_language = 4 << 8,
 120   TAG_language_j = 5 << 8,
 121   TAG_language_ja = 1 << 8,
 122   TAG_language_k = 6 << 8,
 123   TAG_language_ko = 2 << 8,
 124   TAG_language_z = 7 << 8,
 125   TAG_language_zh = 3 << 8,
 126   CURRENT_TAG_MASK = 7 << 8
 127 };
 128
 129
 130 extern int gconv_init (struct __gconv_step *step);
 131 int
 132 gconv_init (struct __gconv_step *step)
 133 {
 134   /* Determine which direction.  */
 135   struct iso2022jp_data *new_data;
 136   enum direction dir = illegal_dir;
 137   enum variant var = illegal_var;
 138   int result;
 139
 140   if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
 141     {
 142       dir = from_iso2022jp;
 143       var = iso2022jp;
 144     }
 145   else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
 146     {
 147       dir = to_iso2022jp;
 148       var = iso2022jp;
 149     }
 150   else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
 151     {
 152       dir = from_iso2022jp;
 153       var = iso2022jp2;
 154     }
 155   else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
 156     {
 157       dir = to_iso2022jp;
 158       var = iso2022jp2;
 159     }
 160
 161   result = __GCONV_NOCONV;
 162   if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
 163     {
 164       new_data
 165         = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
 166
 167       result = __GCONV_NOMEM;
 168       if (new_data != NULL)
 169         {
 170           new_data->dir = dir;
 171           new_data->var = var;
 172           step->__data = new_data;
 173
 174           if (dir == from_iso2022jp)
 175             {
 176               step->__min_needed_from = MIN_NEEDED_FROM;
 177               step->__max_needed_from = MAX_NEEDED_FROM;
 178               step->__min_needed_to = MIN_NEEDED_TO;
 179               step->__max_needed_to = MAX_NEEDED_TO;
 180             }
 181           else
 182             {
 183               step->__min_needed_from = MIN_NEEDED_TO;
 184               step->__max_needed_from = MAX_NEEDED_TO;
 185               step->__min_needed_to = MIN_NEEDED_FROM;
 186               step->__max_needed_to = MAX_NEEDED_FROM + 2;
 187             }
 188
 189           /* Yes, this is a stateful encoding.  */
 190           step->__stateful = 1;
 191
 192           result = __GCONV_OK;
 193         }
 194     }
 195
 196   return result;
 197 }
 198
 199
 200 extern void gconv_end (struct __gconv_step *data);
 201 void
 202 gconv_end (struct __gconv_step *data)
 203 {
 204   free (data->__data);
 205 }
 206
 207
 208 /* Since this is a stateful encoding we have to provide code which resets
 209    the output state to the initial state.  This has to be done during the
 210    flushing.  */
 211 #define EMIT_SHIFT_TO_INIT \
 212   /* Avoid warning about unused variable 'var'.  */                           \
 213   (void) var;                                                                 \
 214                                                                               \
 215   if ((data->__statep->__count & ~7) != ASCII_set)                            \
 216     {                                                                         \
 217       if (dir == from_iso2022jp                                               \
 218           || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set)       \
 219         {                                                                     \
 220           /* It's easy, we don't have to emit anything, we just reset the     \
 221              state for the input.  Note that this also clears the G2          \
 222              designation.  */                                                 \
 223           data->__statep->__count &= 7;                                       \
 224           data->__statep->__count |= ASCII_set;                               \
 225         }                                                                     \
 226       else                                                                    \
 227         {                                                                     \
 228           /* We are not in the initial state.  To switch back we have         \
 229              to emit the sequence `Esc ( B'.  */                              \
 230           if (__builtin_expect (outbuf + 3 > outend, 0))                      \
 231             /* We don't have enough room in the output buffer.  */            \
 232             status = __GCONV_FULL_OUTPUT;                                     \
 233           else                                                                \
 234             {                                                                 \
 235               /* Write out the shift sequence.  */                            \
 236               *outbuf++ = ESC;                                                \
 237               *outbuf++ = '(';                                                \
 238               *outbuf++ = 'B';                                                \
 239               /* Note that this also clears the G2 designation.  */           \
 240               data->__statep->__count &= ~7;                                  \
 241               data->__statep->__count |= ASCII_set;                           \
 242             }                                                                 \
 243         }                                                                     \
 244     }
 245
 246
 247 /* Since we might have to reset input pointer we must be able to save
 248    and retore the state.  */
 249 #define SAVE_RESET_STATE(Save) \
 250   if (Save)                                                                   \
 251     save_set = *setp;                                                         \
 252   else                                                                        \
 253     *setp = save_set
 254
 255
 256 /* First define the conversion function from ISO-2022-JP to UCS4.  */
 257 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
 258 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
 259 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
 260 #define LOOPFCT                 FROM_LOOP
 261 #define BODY \
 262   {                                                                           \
 263     uint32_t ch = *inptr;                                                     \
 264                                                                               \
 265     /* Recognize escape sequences.  */                                        \
 266     if (__builtin_expect (ch, 0) == ESC)                                      \
 267       {                                                                       \
 268         /* We now must be prepared to read two to three more                  \
 269            chracters.  If we have a match in the first character but          \
 270            then the input buffer ends we terminate with an error since        \
 271            we must not risk missing an escape sequence just because it        \
 272            is not entirely in the current input buffer.  */                   \
 273         if (__builtin_expect (inptr + 2 >= inend, 0)                          \
 274             || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '('       \
 275                 && __builtin_expect (inptr + 3 >= inend, 0)))                 \
 276           {                                                                   \
 277             /* Not enough input available.  */                                \
 278             result = __GCONV_INCOMPLETE_INPUT;                                \
 279             break;                                                            \
 280           }                                                                   \
 281                                                                               \
 282         if (inptr[1] == '(')                                                  \
 283           {                                                                   \
 284             if (inptr[2] == 'B')                                              \
 285               {                                                               \
 286                 /* ASCII selected.  */                                        \
 287                 set = ASCII_set;                                              \
 288                 inptr += 3;                                                   \
 289                 continue;                                                     \
 290               }                                                               \
 291             else if (inptr[2] == 'J')                                         \
 292               {                                                               \
 293                 /* JIS X 0201 selected.  */                                   \
 294                 set = JISX0201_Roman_set;                                     \
 295                 inptr += 3;                                                   \
 296                 continue;                                                     \
 297               }                                                               \
 298             else if (var == iso2022jp2 && inptr[2] == 'I')                    \
 299               {                                                               \
 300                 /* JIS X 0201 selected.  */                                   \
 301                 set = JISX0201_Kana_set;                                      \
 302                 inptr += 3;                                                   \
 303                 continue;                                                     \
 304               }                                                               \
 305           }                                                                   \
 306         else if (inptr[1] == '$')                                             \
 307           {                                                                   \
 308             if (inptr[2] == '@')                                              \
 309               {                                                               \
 310                 /* JIS X 0208-1978 selected.  */                              \
 311                 set = JISX0208_1978_set;                                      \
 312                 inptr += 3;                                                   \
 313                 continue;                                                     \
 314               }                                                               \
 315             else if (inptr[2] == 'B')                                         \
 316               {                                                               \
 317                 /* JIS X 0208-1983 selected.  */                              \
 318                 set = JISX0208_1983_set;                                      \
 319                 inptr += 3;                                                   \
 320                 continue;                                                     \
 321               }                                                               \
 322             else if (var == iso2022jp2)                                       \
 323               {                                                               \
 324                 if (inptr[2] == 'A')                                          \
 325                   {                                                           \
 326                     /* GB 2312-1980 selected.  */                             \
 327                     set = GB2312_set;                                         \
 328                     inptr += 3;                                               \
 329                     continue;                                                 \
 330                   }                                                           \
 331                 else if (inptr[2] == '(')                                     \
 332                   {                                                           \
 333                     if (inptr[3] == 'C')                                      \
 334                       {                                                       \
 335                         /* KSC 5601-1987 selected.  */                        \
 336                         set = KSC5601_set;                                    \
 337                         inptr += 4;                                           \
 338                         continue;                                             \
 339                       }                                                       \
 340                     else if (inptr[3] == 'D')                                 \
 341                       {                                                       \
 342                         /* JIS X 0212-1990 selected.  */                      \
 343                         set = JISX0212_set;                                   \
 344                         inptr += 4;                                           \
 345                         continue;                                             \
 346                       }                                                       \
 347                   }                                                           \
 348               }                                                               \
 349           }                                                                   \
 350         else if (var == iso2022jp2 && inptr[1] == '.')                        \
 351           {                                                                   \
 352             if (inptr[2] == 'A')                                              \
 353               {                                                               \
 354                 /* ISO 8859-1-GR selected.  */                                \
 355                 set2 = ISO88591_set;                                          \
 356                 inptr += 3;                                                   \
 357                 continue;                                                     \
 358               }                                                               \
 359             else if (inptr[2] == 'F')                                         \
 360               {                                                               \
 361                 /* ISO 8859-7-GR selected.  */                                \
 362                 set2 = ISO88597_set;                                          \
 363                 inptr += 3;                                                   \
 364                 continue;                                                     \
 365               }                                                               \
 366           }                                                                   \
 367       }                                                                       \
 368                                                                               \
 369     if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')                    \
 370       {                                                                       \
 371         if (set2 == ISO88591_set)                                             \
 372           {                                                                   \
 373             ch = inptr[2] | 0x80;                                             \
 374             inptr += 3;                                                       \
 375           }                                                                   \
 376         else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)       \
 377           {                                                                   \
 378             /* We use the table from the ISO 8859-7 module.  */               \
 379             if (inptr[2] < 0x20 || inptr[2] >= 0x80)                          \
 380               {                                                               \
 381                 if (! ignore_errors_p ())                                     \
 382                   {                                                           \
 383                     result = __GCONV_ILLEGAL_INPUT;                           \
 384                     break;                                                    \
 385                   }                                                           \
 386                                                                               \
 387                 ++inptr;                                                      \
 388                 ++*irreversible;                                              \
 389                 continue;                                                     \
 390               }                                                               \
 391             ch = iso88597_to_ucs4[inptr[2] - 0x20];                           \
 392             if (ch == 0)                                                      \
 393               {                                                               \
 394                 if (! ignore_errors_p ())                                     \
 395                   {                                                           \
 396                     result = __GCONV_ILLEGAL_INPUT;                           \
 397                     break;                                                    \
 398                   }                                                           \
 399                                                                               \
 400                 inptr += 3;                                                   \
 401                 ++*irreversible;                                              \
 402                 continue;                                                     \
 403               }                                                               \
 404             inptr += 3;                                                       \
 405           }                                                                   \
 406         else                                                                  \
 407           {                                                                   \
 408             if (! ignore_errors_p ())                                         \
 409               {                                                               \
 410                 result = __GCONV_ILLEGAL_INPUT;                               \
 411                 break;                                                        \
 412               }                                                               \
 413                                                                               \
 414             ++inptr;                                                          \
 415             ++*irreversible;                                                  \
 416             continue;                                                         \
 417           }                                                                   \
 418       }                                                                       \
 419     else if (ch >= 0x80)                                                      \
 420       {                                                                       \
 421         if (! ignore_errors_p ())                                             \
 422           {                                                                   \
 423             result = __GCONV_ILLEGAL_INPUT;                                   \
 424             break;                                                            \
 425           }                                                                   \
 426                                                                               \
 427         ++inptr;                                                              \
 428         ++*irreversible;                                                      \
 429         continue;                                                             \
 430       }                                                                       \
 431     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))                   \
 432       /* Almost done, just advance the input pointer.  */                     \
 433       ++inptr;                                                                \
 434     else if (set == JISX0201_Roman_set)                                       \
 435       {                                                                       \
 436         /* Use the JIS X 0201 table.  */                                      \
 437         ch = jisx0201_to_ucs4 (ch);                                           \
 438         if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)                 \
 439           {                                                                   \
 440             if (! ignore_errors_p ())                                         \
 441               {                                                               \
 442                 result = __GCONV_ILLEGAL_INPUT;                               \
 443                 break;                                                        \
 444               }                                                               \
 445                                                                               \
 446             ++inptr;                                                          \
 447             ++*irreversible;                                                  \
 448             continue;                                                         \
 449           }                                                                   \
 450         ++inptr;                                                              \
 451       }                                                                       \
 452     else if (set == JISX0201_Kana_set)                                        \
 453       {                                                                       \
 454         /* Use the JIS X 0201 table.  */                                      \
 455         ch = jisx0201_to_ucs4 (ch + 0x80);                                    \
 456         if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)                 \
 457           {                                                                   \
 458             if (! ignore_errors_p ())                                         \
 459               {                                                               \
 460                 result = __GCONV_ILLEGAL_INPUT;                               \
 461                 break;                                                        \
 462               }                                                               \
 463                                                                               \
 464             ++inptr;                                                          \
 465             ++*irreversible;                                                  \
 466             continue;                                                         \
 467           }                                                                   \
 468         ++inptr;                                                              \
 469       }                                                                       \
 470     else                                                                      \
 471       {                                                                       \
 472         if (set == JISX0208_1978_set || set == JISX0208_1983_set)             \
 473           /* XXX I don't have the tables for these two old variants of        \
 474              JIS X 0208.  Therefore I'm using the tables for JIS X            \
 475              0208-1990.  If somebody has problems with this please            \
 476              provide the appropriate tables.  */                              \
 477           ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);                   \
 478         else if (set == JISX0212_set)                                         \
 479           /* Use the JIS X 0212 table.  */                                    \
 480           ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0);                   \
 481         else if (set == GB2312_set)                                           \
 482           /* Use the GB 2312 table.  */                                       \
 483           ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);                     \
 484         else                                                                  \
 485           {                                                                   \
 486             assert (set == KSC5601_set);                                      \
 487                                                                               \
 488             /* Use the KSC 5601 table.  */                                    \
 489             ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);                  \
 490           }                                                                   \
 491                                                                               \
 492         if (__builtin_expect (ch, 1) == 0)                                    \
 493           {                                                                   \
 494             result = __GCONV_INCOMPLETE_INPUT;                                \
 495             break;                                                            \
 496           }                                                                   \
 497         else if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)            \
 498           {                                                                   \
 499             if (! ignore_errors_p ())                                         \
 500               {                                                               \
 501                 result = __GCONV_ILLEGAL_INPUT;                               \
 502                 break;                                                        \
 503               }                                                               \
 504                                                                               \
 505             ++inptr;                                                          \
 506             ++*irreversible;                                                  \
 507             continue;                                                         \
 508           }                                                                   \
 509       }                                                                       \
 510                                                                               \
 511     put32 (outptr, ch);                                                       \
 512     outptr += 4;                                                              \
 513   }
 514 #define LOOP_NEED_FLAGS
 515 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
 516 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
 517                                 int set2 = *setp & CURRENT_ASSIGN_MASK
 518 #define UPDATE_PARAMS           *setp = set | set2
 519 #include <iconv/loop.c>
 520
 521
 522 /* Next, define the other direction.  */
 523
 524 enum conversion { none = 0, european, japanese, chinese, korean, other };
 525
 526 /* A datatype for conversion lists.  */
 527 typedef unsigned int cvlist_t;
 528 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
 529   ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
 530 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
 531 #define CVLIST_REST(cvl) ((cvl) >> 3)
 532 static const cvlist_t conversion_lists[4] =
 533   {
 534     /* TAG_none */        CVLIST (japanese, european, chinese, korean, other),
 535     /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
 536     /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
 537     /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
 538   };
 539
 540 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
 541 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
 542 #define MAX_NEEDED_OUTPUT       (MAX_NEEDED_FROM + 2)
 543 #define LOOPFCT                 TO_LOOP
 544 #define BODY \
 545   {                                                                           \
 546     uint32_t ch;                                                              \
 547     size_t written;                                                           \
 548                                                                               \
 549     ch = get32 (inptr);                                                       \
 550                                                                               \
 551     if (var == iso2022jp2)                                                    \
 552       {                                                                       \
 553         /* Handle Unicode tag characters (range U+E0000..U+E007F).  */        \
 554         if (__builtin_expect ((ch >> 7) == (0xe0000 >> 7), 0))                \
 555           {                                                                   \
 556             ch &= 0x7f;                                                       \
 557             if (ch >= 'A' && ch <= 'Z')                                       \
 558               ch += 'a' - 'A';                                                \
 559             if (ch == 0x01)                                                   \
 560               tag = TAG_language;                                             \
 561             else if (ch == 'j' && tag == TAG_language)                        \
 562               tag = TAG_language_j;                                           \
 563             else if (ch == 'a' && tag == TAG_language_j)                      \
 564               tag = TAG_language_ja;                                          \
 565             else if (ch == 'k' && tag == TAG_language)                        \
 566               tag = TAG_language_k;                                           \
 567             else if (ch == 'o' && tag == TAG_language_k)                      \
 568               tag = TAG_language_ko;                                          \
 569             else if (ch == 'z' && tag == TAG_language)                        \
 570               tag = TAG_language_z;                                           \
 571             else if (ch == 'h' && tag == TAG_language_z)                      \
 572               tag = TAG_language_zh;                                          \
 573             else if (ch == 0x7f)                                              \
 574               tag = TAG_none;                                                 \
 575             else                                                              \
 576               {                                                               \
 577                 /* Other tag characters reset the tag parsing state (if the   \
 578                    current state is a temporary state) or are ignored (if     \
 579                    the current state is a stable one).  */                    \
 580                 if (tag >= TAG_language)                                      \
 581                   tag = TAG_none;                                             \
 582               }                                                               \
 583                                                                               \
 584             inptr += 4;                                                       \
 585             continue;                                                         \
 586           }                                                                   \
 587                                                                               \
 588         /* Non-tag characters reset the tag parsing state, if the current     \
 589            state is a temporary state.  */                                    \
 590         if (__builtin_expect (tag >= TAG_language, 0))                        \
 591           tag = TAG_none;                                                     \
 592       }                                                                       \
 593                                                                               \
 594     /* First see whether we can write the character using the currently       \
 595        selected character set.  But ignore the selected character set if      \
 596        the current language tag shows different preferences.  */              \
 597     if (set == ASCII_set)                                                     \
 598       {                                                                       \
 599         /* Please note that the NUL byte is *not* matched if we are not       \
 600            currently using the ASCII charset.  This is because we must        \
 601            switch to the initial state whenever a NUL byte is written.  */    \
 602         if (ch <= 0x7f)                                                       \
 603           {                                                                   \
 604             *outptr++ = ch;                                                   \
 605             written = 1;                                                      \
 606                                                                               \
 607             /* At the beginning of a line, G2 designation is cleared.  */     \
 608             if (var == iso2022jp2 && ch == 0x0a)                              \
 609               set2 = UNSPECIFIED_set;                                         \
 610           }                                                                   \
 611         else                                                                  \
 612           written = __UNKNOWN_10646_CHAR;                                     \
 613       }                                                                       \
 614     /* ISO-2022-JP recommends to encode the newline character always in       \
 615        ASCII since this allows a context-free interpretation of the           \
 616        characters at the beginning of the next line.  Otherwise it would      \
 617        have to be known whether the last line ended using ASCII or            \
 618        JIS X 0201.  */                                                        \
 619     else if (set == JISX0201_Roman_set                                        \
 620              && (__builtin_expect (tag == TAG_none, 1)                        \
 621                  || tag == TAG_language_ja))                                  \
 622       {                                                                       \
 623         unsigned char buf[1];                                                 \
 624         written = ucs4_to_jisx0201 (ch, buf);                                 \
 625         if (written != __UNKNOWN_10646_CHAR)                                  \
 626           {                                                                   \
 627             if (buf[0] > 0x20 && buf[0] < 0x80)                               \
 628               {                                                               \
 629                 *outptr++ = buf[0];                                           \
 630                 written = 1;                                                  \
 631               }                                                               \
 632             else                                                              \
 633               written = __UNKNOWN_10646_CHAR;                                 \
 634           }                                                                   \
 635       }                                                                       \
 636     else if (set == JISX0201_Kana_set                                         \
 637              && (__builtin_expect (tag == TAG_none, 1)                        \
 638                  || tag == TAG_language_ja))                                  \
 639       {                                                                       \
 640         unsigned char buf[1];                                                 \
 641         written = ucs4_to_jisx0201 (ch, buf);                                 \
 642         if (written != __UNKNOWN_10646_CHAR)                                  \
 643           {                                                                   \
 644             if (buf[0] > 0xa0 && buf[0] < 0xe0)                               \
 645               {                                                               \
 646                 *outptr++ = buf[0] - 0x80;                                    \
 647                 written = 1;                                                  \
 648               }                                                               \
 649             else                                                              \
 650               written = __UNKNOWN_10646_CHAR;                                 \
 651           }                                                                   \
 652       }                                                                       \
 653     else                                                                      \
 654       {                                                                       \
 655         if ((set == JISX0208_1978_set || set == JISX0208_1983_set)            \
 656             && (__builtin_expect (tag == TAG_none, 1)                         \
 657                 || tag == TAG_language_ja))                                   \
 658           written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);           \
 659         else if (set == JISX0212_set                                          \
 660                  && (__builtin_expect (tag == TAG_none, 1)                    \
 661                      || tag == TAG_language_ja))                              \
 662           written = ucs4_to_jisx0212 (ch, outptr, outend - outptr);           \
 663         else if (set == GB2312_set                                            \
 664                  && (__builtin_expect (tag == TAG_none, 1)                    \
 665                      || tag == TAG_language_zh))                              \
 666           written = ucs4_to_gb2312 (ch, outptr, outend - outptr);             \
 667         else if (set == KSC5601_set                                           \
 668                  && (__builtin_expect (tag == TAG_none, 1)                    \
 669                      || tag == TAG_language_ko))                              \
 670           written = ucs4_to_ksc5601 (ch, outptr, outend - outptr);            \
 671         else                                                                  \
 672           written = __UNKNOWN_10646_CHAR;                                     \
 673                                                                               \
 674         if (__builtin_expect (written == 0, 0))                               \
 675           {                                                                   \
 676             result = __GCONV_FULL_OUTPUT;                                     \
 677             break;                                                            \
 678           }                                                                   \
 679         else if (written != __UNKNOWN_10646_CHAR)                             \
 680           outptr += written;                                                  \
 681       }                                                                       \
 682                                                                               \
 683     if (written == __UNKNOWN_10646_CHAR                                       \
 684         && __builtin_expect (tag == TAG_none, 1))                             \
 685       {                                                                       \
 686         if (set2 == ISO88591_set)                                             \
 687           {                                                                   \
 688             if (ch >= 0x80 && ch <= 0xff)                                     \
 689               {                                                               \
 690                 if (__builtin_expect (outptr + 3 > outend, 0))                \
 691                   {                                                           \
 692                     result = __GCONV_FULL_OUTPUT;                             \
 693                     break;                                                    \
 694                   }                                                           \
 695                                                                               \
 696                 *outptr++ = ESC;                                              \
 697                 *outptr++ = 'N';                                              \
 698                 *outptr++ = ch & 0x7f;                                        \
 699                 written = 3;                                                  \
 700               }                                                               \
 701           }                                                                   \
 702         else if (set2 == ISO88597_set)                                        \
 703           {                                                                   \
 704             const struct gap *rp = from_idx;                                  \
 705                                                                               \
 706             while (ch > rp->end)                                              \
 707               ++rp;                                                           \
 708             if (ch >= rp->start)                                              \
 709               {                                                               \
 710                 unsigned char res = iso88597_from_ucs4[ch - 0xa0 + rp->idx];  \
 711                 if (res != '\0')                                              \
 712                   {                                                           \
 713                     if (__builtin_expect (outptr + 3 > outend, 0))            \
 714                       {                                                       \
 715                         result = __GCONV_FULL_OUTPUT;                         \
 716                         break;                                                \
 717                       }                                                       \
 718                                                                               \
 719                     *outptr++ = ESC;                                          \
 720                     *outptr++ = 'N';                                          \
 721                     *outptr++ = res;                                          \
 722                     written = 3;                                              \
 723                   }                                                           \
 724               }                                                               \
 725           }                                                                   \
 726       }                                                                       \
 727                                                                               \
 728     if (written == __UNKNOWN_10646_CHAR)                                      \
 729       {                                                                       \
 730         /* The attempts to use the currently selected character set           \
 731            failed, either because the language tag changed, or because        \
 732            the character requires a different character set, or because       \
 733            the character is unknown.                                          \
 734            The CJK character sets partially overlap when seen as subsets      \
 735            of ISO 10646; therefore there is no single correct result.         \
 736            We use a preferrence order which depends on the language tag.  */  \
 737                                                                               \
 738         if (ch <= 0x7f)                                                       \
 739           {                                                                   \
 740             /* We must encode using ASCII.  First write out the               \
 741                escape sequence.  */                                           \
 742             if (__builtin_expect (outptr + 3 > outend, 0))                    \
 743               {                                                               \
 744                 result = __GCONV_FULL_OUTPUT;                                 \
 745                 break;                                                        \
 746               }                                                               \
 747                                                                               \
 748             *outptr++ = ESC;                                                  \
 749             *outptr++ = '(';                                                  \
 750             *outptr++ = 'B';                                                  \
 751             set = ASCII_set;                                                  \
 752                                                                               \
 753             if (__builtin_expect (outptr + 1 > outend, 0))                    \
 754               {                                                               \
 755                 result = __GCONV_FULL_OUTPUT;                                 \
 756                 break;                                                        \
 757               }                                                               \
 758             *outptr++ = ch;                                                   \
 759                                                                               \
 760             /* At the beginning of a line, G2 designation is cleared.  */     \
 761             if (var == iso2022jp2 && ch == 0x0a)                              \
 762               set2 = UNSPECIFIED_set;                                         \
 763           }                                                                   \
 764         else                                                                  \
 765           {                                                                   \
 766             /* Now it becomes difficult.  We must search the other            \
 767                character sets one by one.  Use an ordered conversion          \
 768                list that depends on the current language tag.  */             \
 769             cvlist_t conversion_list;                                         \
 770             unsigned char buf[2];                                             \
 771                                                                               \
 772             result = __GCONV_ILLEGAL_INPUT;                                   \
 773                                                                               \
 774             if (var == iso2022jp2)                                            \
 775               conversion_list = conversion_lists[tag >> 8];                   \
 776             else                                                              \
 777               conversion_list = CVLIST (japanese, 0, 0, 0, 0);                \
 778                                                                               \
 779             do                                                                \
 780               switch (CVLIST_FIRST (conversion_list))                         \
 781                 {                                                             \
 782                 case european:                                                \
 783                                                                               \
 784                   /* Try ISO 8859-1 upper half.   */                          \
 785                   if (ch >= 0x80 && ch <= 0xff)                               \
 786                     {                                                         \
 787                       if (set2 != ISO88591_set)                               \
 788                         {                                                     \
 789                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 790                             {                                                 \
 791                               result = __GCONV_FULL_OUTPUT;                   \
 792                               break;                                          \
 793                             }                                                 \
 794                           *outptr++ = ESC;                                    \
 795                           *outptr++ = '.';                                    \
 796                           *outptr++ = 'A';                                    \
 797                           set2 = ISO88591_set;                                \
 798                         }                                                     \
 799                                                                               \
 800                       if (__builtin_expect (outptr + 3 > outend, 0))          \
 801                         {                                                     \
 802                           result = __GCONV_FULL_OUTPUT;                       \
 803                           break;                                              \
 804                         }                                                     \
 805                       *outptr++ = ESC;                                        \
 806                       *outptr++ = 'N';                                        \
 807                       *outptr++ = ch - 0x80;                                  \
 808                       result = __GCONV_OK;                                    \
 809                       break;                                                  \
 810                     }                                                         \
 811                                                                               \
 812                   /* Try ISO 8859-7 upper half.  */                           \
 813                   {                                                           \
 814                     const struct gap *rp = from_idx;                          \
 815                                                                               \
 816                     while (ch > rp->end)                                      \
 817                       ++rp;                                                   \
 818                     if (ch >= rp->start)                                      \
 819                       {                                                       \
 820                         unsigned char res =                                   \
 821                           iso88597_from_ucs4[ch - 0xa0 + rp->idx];            \
 822                         if (res != '\0')                                      \
 823                           {                                                   \
 824                             if (set2 != ISO88597_set)                         \
 825                               {                                               \
 826                                 if (__builtin_expect (outptr + 3 > outend, 0))\
 827                                   {                                           \
 828                                     result = __GCONV_FULL_OUTPUT;             \
 829                                     break;                                    \
 830                                   }                                           \
 831                                 *outptr++ = ESC;                              \
 832                                 *outptr++ = '.';                              \
 833                                 *outptr++ = 'F';                              \
 834                                 set2 = ISO88597_set;                          \
 835                               }                                               \
 836                                                                               \
 837                             if (__builtin_expect (outptr + 3 > outend, 0))    \
 838                               {                                               \
 839                                 result = __GCONV_FULL_OUTPUT;                 \
 840                                 break;                                        \
 841                               }                                               \
 842                             *outptr++ = ESC;                                  \
 843                             *outptr++ = 'N';                                  \
 844                             *outptr++ = res;                                  \
 845                             result = __GCONV_OK;                              \
 846                             break;                                            \
 847                           }                                                   \
 848                       }                                                       \
 849                   }                                                           \
 850                                                                               \
 851                   break;                                                      \
 852                                                                               \
 853                 case japanese:                                                \
 854                                                                               \
 855                   /* Try JIS X 0201 Roman.  */                                \
 856                   written = ucs4_to_jisx0201 (ch, buf);                       \
 857                   if (written != __UNKNOWN_10646_CHAR                         \
 858                       && buf[0] > 0x20 && buf[0] < 0x80)                      \
 859                     {                                                         \
 860                       if (set != JISX0201_Roman_set)                          \
 861                         {                                                     \
 862                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 863                             {                                                 \
 864                               result = __GCONV_FULL_OUTPUT;                   \
 865                               break;                                          \
 866                             }                                                 \
 867                           *outptr++ = ESC;                                    \
 868                           *outptr++ = '(';                                    \
 869                           *outptr++ = 'J';                                    \
 870                           set = JISX0201_Roman_set;                           \
 871                         }                                                     \
 872                                                                               \
 873                       if (__builtin_expect (outptr + 1 > outend, 0))          \
 874                         {                                                     \
 875                           result = __GCONV_FULL_OUTPUT;                       \
 876                           break;                                              \
 877                         }                                                     \
 878                       *outptr++ = buf[0];                                     \
 879                       result = __GCONV_OK;                                    \
 880                       break;                                                  \
 881                     }                                                         \
 882                                                                               \
 883                   /* Try JIS X 0208.  */                                      \
 884                   written = ucs4_to_jisx0208 (ch, buf, 2);                    \
 885                   if (written != __UNKNOWN_10646_CHAR)                        \
 886                     {                                                         \
 887                       if (set != JISX0208_1983_set)                           \
 888                         {                                                     \
 889                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 890                             {                                                 \
 891                               result = __GCONV_FULL_OUTPUT;                   \
 892                               break;                                          \
 893                             }                                                 \
 894                           *outptr++ = ESC;                                    \
 895                           *outptr++ = '$';                                    \
 896                           *outptr++ = 'B';                                    \
 897                           set = JISX0208_1983_set;                            \
 898                         }                                                     \
 899                                                                               \
 900                       if (__builtin_expect (outptr + 2 > outend, 0))          \
 901                         {                                                     \
 902                           result = __GCONV_FULL_OUTPUT;                       \
 903                           break;                                              \
 904                         }                                                     \
 905                       *outptr++ = buf[0];                                     \
 906                       *outptr++ = buf[1];                                     \
 907                       result = __GCONV_OK;                                    \
 908                       break;                                                  \
 909                     }                                                         \
 910                                                                               \
 911                   if (__builtin_expect (var == iso2022jp, 0))                 \
 912                     /* Don't use the other Japanese character sets.  */       \
 913                     break;                                                    \
 914                                                                               \
 915                   /* Try JIS X 0212.  */                                      \
 916                   written = ucs4_to_jisx0212 (ch, buf, 2);                    \
 917                   if (written != __UNKNOWN_10646_CHAR)                        \
 918                     {                                                         \
 919                       if (set != JISX0212_set)                                \
 920                         {                                                     \
 921                           if (__builtin_expect (outptr + 4 > outend, 0))      \
 922                             {                                                 \
 923                               result = __GCONV_FULL_OUTPUT;                   \
 924                               break;                                          \
 925                             }                                                 \
 926                           *outptr++ = ESC;                                    \
 927                           *outptr++ = '$';                                    \
 928                           *outptr++ = '(';                                    \
 929                           *outptr++ = 'D';                                    \
 930                           set = JISX0212_set;                                 \
 931                         }                                                     \
 932                                                                               \
 933                       if (__builtin_expect (outptr + 2 > outend, 0))          \
 934                         {                                                     \
 935                           result = __GCONV_FULL_OUTPUT;                       \
 936                           break;                                              \
 937                         }                                                     \
 938                       *outptr++ = buf[0];                                     \
 939                       *outptr++ = buf[1];                                     \
 940                       result = __GCONV_OK;                                    \
 941                       break;                                                  \
 942                     }                                                         \
 943                                                                               \
 944                   break;                                                      \
 945                                                                               \
 946                 case chinese:                                                 \
 947                   assert (var == iso2022jp2);                                 \
 948                                                                               \
 949                   /* Try GB 2312.  */                                         \
 950                   written = ucs4_to_gb2312 (ch, buf, 2);                      \
 951                   if (written != __UNKNOWN_10646_CHAR)                        \
 952                     {                                                         \
 953                       if (set != GB2312_set)                                  \
 954                         {                                                     \
 955                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 956                             {                                                 \
 957                               result = __GCONV_FULL_OUTPUT;                   \
 958                               break;                                          \
 959                             }                                                 \
 960                           *outptr++ = ESC;                                    \
 961                           *outptr++ = '$';                                    \
 962                           *outptr++ = 'A';                                    \
 963                           set = GB2312_set;                                   \
 964                         }                                                     \
 965                                                                               \
 966                       if (__builtin_expect (outptr + 2 > outend, 0))          \
 967                         {                                                     \
 968                           result = __GCONV_FULL_OUTPUT;                       \
 969                           break;                                              \
 970                         }                                                     \
 971                       *outptr++ = buf[0];                                     \
 972                       *outptr++ = buf[1];                                     \
 973                       result = __GCONV_OK;                                    \
 974                       break;                                                  \
 975                     }                                                         \
 976                                                                               \
 977                   break;                                                      \
 978                                                                               \
 979                 case korean:                                                  \
 980                   assert (var == iso2022jp2);                                 \
 981                                                                               \
 982                   /* Try KSC 5601.  */                                        \
 983                   written = ucs4_to_ksc5601 (ch, buf, 2);                     \
 984                   if (written != __UNKNOWN_10646_CHAR)                        \
 985                     {                                                         \
 986                       if (set != KSC5601_set)                                 \
 987                         {                                                     \
 988                           if (__builtin_expect (outptr + 4 > outend, 0))      \
 989                             {                                                 \
 990                               result = __GCONV_FULL_OUTPUT;                   \
 991                               break;                                          \
 992                             }                                                 \
 993                           *outptr++ = ESC;                                    \
 994                           *outptr++ = '$';                                    \
 995                           *outptr++ = '(';                                    \
 996                           *outptr++ = 'C';                                    \
 997                           set = KSC5601_set;                                  \
 998                         }                                                     \
 999                                                                               \
1000                       if (__builtin_expect (outptr + 2 > outend, 0))          \
1001                         {                                                     \
1002                           result = __GCONV_FULL_OUTPUT;                       \
1003                           break;                                              \
1004                         }                                                     \
1005                       *outptr++ = buf[0];                                     \
1006                       *outptr++ = buf[1];                                     \
1007                       result = __GCONV_OK;                                    \
1008                       break;                                                  \
1009                     }                                                         \
1010                                                                               \
1011                   break;                                                      \
1012                                                                               \
1013                 case other:                                                   \
1014                   assert (var == iso2022jp2);                                 \
1015                                                                               \
1016                   /* Try JIS X 0201 Kana.  This is not officially part        \
1017                      of ISO-2022-JP-2, according to RFC 1554.  Therefore      \
1018                      we try this only after all other attempts.  */           \
1019                   written = ucs4_to_jisx0201 (ch, buf);                       \
1020                   if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80)      \
1021                     {                                                         \
1022                       if (set != JISX0201_Kana_set)                           \
1023                         {                                                     \
1024                           if (__builtin_expect (outptr + 3 > outend, 0))      \
1025                             {                                                 \
1026                               result = __GCONV_FULL_OUTPUT;                   \
1027                               break;                                          \
1028                             }                                                 \
1029                           *outptr++ = ESC;                                    \
1030                           *outptr++ = '(';                                    \
1031                           *outptr++ = 'I';                                    \
1032                           set = JISX0201_Kana_set;                            \
1033                         }                                                     \
1034                                                                               \
1035                       if (__builtin_expect (outptr + 1 > outend, 0))          \
1036                         {                                                     \
1037                           result = __GCONV_FULL_OUTPUT;                       \
1038                           break;                                              \
1039                         }                                                     \
1040                       *outptr++ = buf[0] - 0x80;                              \
1041                       result = __GCONV_OK;                                    \
1042                       break;                                                  \
1043                     }                                                         \
1044                                                                               \
1045                   break;                                                      \
1046                                                                               \
1047                 default:                                                      \
1048                   abort ();                                                   \
1049                 }                                                             \
1050             while (result == __GCONV_ILLEGAL_INPUT                            \
1051                    && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1052                                                                               \
1053             if (result == __GCONV_FULL_OUTPUT)                                \
1054               break;                                                          \
1055                                                                               \
1056             if (result == __GCONV_ILLEGAL_INPUT)                              \
1057               {                                                               \
1058                 STANDARD_ERR_HANDLER (4);                                     \
1059               }                                                               \
1060           }                                                                   \
1061       }                                                                       \
1062                                                                               \
1063     /* Now that we wrote the output increment the input pointer.  */          \
1064     inptr += 4;                                                               \
1065   }
1066 #define LOOP_NEED_FLAGS
1067 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
1068 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
1069                                 int set2 = *setp & CURRENT_ASSIGN_MASK;       \
1070                                 int tag = *setp & CURRENT_TAG_MASK;
1071 #define UPDATE_PARAMS           *setp = set | set2 | tag
1072 #include <iconv/loop.c>
1073
1074
1075 /* Now define the toplevel functions.  */
1076 #include <iconv/skeleton.c>