iconvdata/iso-2022-jp.c

   1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
   2    Copyright (C) 1998-2023 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <assert.h>
  20 #include <dlfcn.h>
  21 #include <gconv.h>
  22 #include <stdint.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include "jis0201.h"
  26 #include "jis0208.h"
  27 #include "jis0212.h"
  28 #include "gb2312.h"
  29 #include "ksc5601.h"
  30
  31 struct gap
  32 {
  33   uint16_t start;
  34   uint16_t end;
  35   int32_t idx;
  36 };
  37
  38 #include "iso8859-7jp.h"
  39
  40 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
  41 #define ESC 0x1b
  42
  43 /* We provide our own initialization and destructor function.  */
  44 #define DEFINE_INIT     0
  45 #define DEFINE_FINI     0
  46
  47 /* Definitions used in the body of the `gconv' function.  */
  48 #define FROM_LOOP               from_iso2022jp_loop
  49 #define TO_LOOP                 to_iso2022jp_loop
  50 #define ONE_DIRECTION                   0
  51 #define FROM_LOOP_MIN_NEEDED_FROM       1
  52 #define FROM_LOOP_MAX_NEEDED_FROM       4
  53 #define FROM_LOOP_MIN_NEEDED_TO         4
  54 #define FROM_LOOP_MAX_NEEDED_TO         4
  55 #define TO_LOOP_MIN_NEEDED_FROM         4
  56 #define TO_LOOP_MAX_NEEDED_FROM         4
  57 #define TO_LOOP_MIN_NEEDED_TO           1
  58 #define TO_LOOP_MAX_NEEDED_TO           6
  59 #define FROM_DIRECTION          (dir == from_iso2022jp)
  60 #define PREPARE_LOOP \
  61   enum direction dir = ((struct iso2022jp_data *) step->__data)->dir;         \
  62   enum variant var = ((struct iso2022jp_data *) step->__data)->var;           \
  63   int save_set;                                                               \
  64   int *setp = &data->__statep->__count;
  65 #define EXTRA_LOOP_ARGS         , var, setp
  66
  67
  68 /* Direction of the transformation.  */
  69 enum direction
  70 {
  71   illegal_dir,
  72   to_iso2022jp,
  73   from_iso2022jp
  74 };
  75
  76 /* We handle ISO-2022-jp and ISO-2022-JP-2 here.  */
  77 enum variant
  78 {
  79   illegal_var,
  80   iso2022jp,
  81   iso2022jp2
  82 };
  83
  84
  85 struct iso2022jp_data
  86 {
  87   enum direction dir;
  88   enum variant var;
  89 };
  90
  91
  92 /* The COUNT element of the state keeps track of the currently selected
  93    character set.  The possible values are:  */
  94 enum
  95 {
  96   ASCII_set = 0,
  97   JISX0208_1978_set = 1 << 3,
  98   JISX0208_1983_set = 2 << 3,
  99   JISX0201_Roman_set = 3 << 3,
 100   JISX0201_Kana_set = 4 << 3,
 101   GB2312_set = 5 << 3,
 102   KSC5601_set = 6 << 3,
 103   JISX0212_set = 7 << 3,
 104   CURRENT_SEL_MASK = 7 << 3
 105 };
 106
 107 /* The second value stored is the designation of the G2 set.  The following
 108    values are possible:  */
 109 enum
 110 {
 111   UNSPECIFIED_set = 0,
 112   ISO88591_set = 1 << 6,
 113   ISO88597_set = 2 << 6,
 114   CURRENT_ASSIGN_MASK = 3 << 6
 115 };
 116
 117 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
 118    describes the language tag parsing status.  The possible values are as
 119    follows.  Values >= TAG_language are temporary tag parsing states.  */
 120 enum
 121 {
 122   TAG_none = 0,
 123   TAG_language = 4 << 8,
 124   TAG_language_j = 5 << 8,
 125   TAG_language_ja = 1 << 8,
 126   TAG_language_k = 6 << 8,
 127   TAG_language_ko = 2 << 8,
 128   TAG_language_z = 7 << 8,
 129   TAG_language_zh = 3 << 8,
 130   CURRENT_TAG_MASK = 7 << 8
 131 };
 132
 133
 134 extern int gconv_init (struct __gconv_step *step);
 135 int
 136 gconv_init (struct __gconv_step *step)
 137 {
 138   /* Determine which direction.  */
 139   struct iso2022jp_data *new_data;
 140   enum direction dir = illegal_dir;
 141   enum variant var = illegal_var;
 142   int result;
 143
 144   if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
 145     {
 146       dir = from_iso2022jp;
 147       var = iso2022jp;
 148     }
 149   else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
 150     {
 151       dir = to_iso2022jp;
 152       var = iso2022jp;
 153     }
 154   else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
 155     {
 156       dir = from_iso2022jp;
 157       var = iso2022jp2;
 158     }
 159   else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
 160     {
 161       dir = to_iso2022jp;
 162       var = iso2022jp2;
 163     }
 164
 165   result = __GCONV_NOCONV;
 166   if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
 167     {
 168       new_data
 169         = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
 170
 171       result = __GCONV_NOMEM;
 172       if (new_data != NULL)
 173         {
 174           new_data->dir = dir;
 175           new_data->var = var;
 176           step->__data = new_data;
 177
 178           if (dir == from_iso2022jp)
 179             {
 180               step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
 181               step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
 182               step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
 183               step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
 184             }
 185           else
 186             {
 187               step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
 188               step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
 189               step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
 190               step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
 191             }
 192
 193           /* Yes, this is a stateful encoding.  */
 194           step->__stateful = 1;
 195
 196           result = __GCONV_OK;
 197         }
 198     }
 199
 200   return result;
 201 }
 202
 203
 204 extern void gconv_end (struct __gconv_step *data);
 205 void
 206 gconv_end (struct __gconv_step *data)
 207 {
 208   free (data->__data);
 209 }
 210
 211
 212 /* Since this is a stateful encoding we have to provide code which resets
 213    the output state to the initial state.  This has to be done during the
 214    flushing.  */
 215 #define EMIT_SHIFT_TO_INIT \
 216   /* Avoid warning about unused variable 'var'.  */                           \
 217   (void) var;                                                                 \
 218                                                                               \
 219   if ((data->__statep->__count & ~7) != ASCII_set)                            \
 220     {                                                                         \
 221       if (dir == from_iso2022jp                                               \
 222           || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set)       \
 223         {                                                                     \
 224           /* It's easy, we don't have to emit anything, we just reset the     \
 225              state for the input.  Note that this also clears the G2          \
 226              designation.  */                                                 \
 227           data->__statep->__count &= 7;                                       \
 228           data->__statep->__count |= ASCII_set;                               \
 229         }                                                                     \
 230       else                                                                    \
 231         {                                                                     \
 232           /* We are not in the initial state.  To switch back we have         \
 233              to emit the sequence `Esc ( B'.  */                              \
 234           if (__glibc_unlikely (outbuf + 3 > outend))                         \
 235             /* We don't have enough room in the output buffer.  */            \
 236             status = __GCONV_FULL_OUTPUT;                                     \
 237           else                                                                \
 238             {                                                                 \
 239               /* Write out the shift sequence.  */                            \
 240               *outbuf++ = ESC;                                                \
 241               *outbuf++ = '(';                                                \
 242               *outbuf++ = 'B';                                                \
 243               /* Note that this also clears the G2 designation.  */           \
 244               data->__statep->__count &= 7;                                   \
 245               data->__statep->__count |= ASCII_set;                           \
 246             }                                                                 \
 247         }                                                                     \
 248     }
 249
 250
 251 /* Since we might have to reset input pointer we must be able to save
 252    and retore the state.  */
 253 #define SAVE_RESET_STATE(Save) \
 254   if (Save)                                                                   \
 255     save_set = *setp;                                                         \
 256   else                                                                        \
 257     *setp = save_set
 258
 259
 260 /* First define the conversion function from ISO-2022-JP to UCS4.  */
 261 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
 262 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
 263 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
 264 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
 265 #define LOOPFCT                 FROM_LOOP
 266 #define BODY \
 267   {                                                                           \
 268     uint32_t ch = *inptr;                                                     \
 269                                                                               \
 270     /* Recognize escape sequences.  */                                        \
 271     if (__builtin_expect (ch, 0) == ESC)                                      \
 272       {                                                                       \
 273         /* We now must be prepared to read two to three more                  \
 274            characters.  If we have a match in the first character but         \
 275            then the input buffer ends we terminate with an error since        \
 276            we must not risk missing an escape sequence just because it        \
 277            is not entirely in the current input buffer.  */                   \
 278         if (__builtin_expect (inptr + 2 >= inend, 0)                          \
 279             || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '('       \
 280                 && __builtin_expect (inptr + 3 >= inend, 0)))                 \
 281           {                                                                   \
 282             /* Not enough input available.  */                                \
 283             result = __GCONV_INCOMPLETE_INPUT;                                \
 284             break;                                                            \
 285           }                                                                   \
 286                                                                               \
 287         if (inptr[1] == '(')                                                  \
 288           {                                                                   \
 289             if (inptr[2] == 'B')                                              \
 290               {                                                               \
 291                 /* ASCII selected.  */                                        \
 292                 set = ASCII_set;                                              \
 293                 inptr += 3;                                                   \
 294                 continue;                                                     \
 295               }                                                               \
 296             else if (inptr[2] == 'J')                                         \
 297               {                                                               \
 298                 /* JIS X 0201 selected.  */                                   \
 299                 set = JISX0201_Roman_set;                                     \
 300                 inptr += 3;                                                   \
 301                 continue;                                                     \
 302               }                                                               \
 303             else if (var == iso2022jp2 && inptr[2] == 'I')                    \
 304               {                                                               \
 305                 /* JIS X 0201 selected.  */                                   \
 306                 set = JISX0201_Kana_set;                                      \
 307                 inptr += 3;                                                   \
 308                 continue;                                                     \
 309               }                                                               \
 310           }                                                                   \
 311         else if (inptr[1] == '$')                                             \
 312           {                                                                   \
 313             if (inptr[2] == '@')                                              \
 314               {                                                               \
 315                 /* JIS X 0208-1978 selected.  */                              \
 316                 set = JISX0208_1978_set;                                      \
 317                 inptr += 3;                                                   \
 318                 continue;                                                     \
 319               }                                                               \
 320             else if (inptr[2] == 'B')                                         \
 321               {                                                               \
 322                 /* JIS X 0208-1983 selected.  */                              \
 323                 set = JISX0208_1983_set;                                      \
 324                 inptr += 3;                                                   \
 325                 continue;                                                     \
 326               }                                                               \
 327             else if (var == iso2022jp2)                                       \
 328               {                                                               \
 329                 if (inptr[2] == 'A')                                          \
 330                   {                                                           \
 331                     /* GB 2312-1980 selected.  */                             \
 332                     set = GB2312_set;                                         \
 333                     inptr += 3;                                               \
 334                     continue;                                                 \
 335                   }                                                           \
 336                 else if (inptr[2] == '(')                                     \
 337                   {                                                           \
 338                     if (inptr[3] == 'C')                                      \
 339                       {                                                       \
 340                         /* KSC 5601-1987 selected.  */                        \
 341                         set = KSC5601_set;                                    \
 342                         inptr += 4;                                           \
 343                         continue;                                             \
 344                       }                                                       \
 345                     else if (inptr[3] == 'D')                                 \
 346                       {                                                       \
 347                         /* JIS X 0212-1990 selected.  */                      \
 348                         set = JISX0212_set;                                   \
 349                         inptr += 4;                                           \
 350                         continue;                                             \
 351                       }                                                       \
 352                   }                                                           \
 353               }                                                               \
 354           }                                                                   \
 355         else if (var == iso2022jp2 && inptr[1] == '.')                        \
 356           {                                                                   \
 357             if (inptr[2] == 'A')                                              \
 358               {                                                               \
 359                 /* ISO 8859-1-GR selected.  */                                \
 360                 set2 = ISO88591_set;                                          \
 361                 inptr += 3;                                                   \
 362                 continue;                                                     \
 363               }                                                               \
 364             else if (inptr[2] == 'F')                                         \
 365               {                                                               \
 366                 /* ISO 8859-7-GR selected.  */                                \
 367                 set2 = ISO88597_set;                                          \
 368                 inptr += 3;                                                   \
 369                 continue;                                                     \
 370               }                                                               \
 371           }                                                                   \
 372       }                                                                       \
 373                                                                               \
 374     if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')                    \
 375       {                                                                       \
 376         if (set2 == ISO88591_set)                                             \
 377           {                                                                   \
 378             ch = inptr[2] | 0x80;                                             \
 379             inptr += 3;                                                       \
 380           }                                                                   \
 381         else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)       \
 382           {                                                                   \
 383             /* We use the table from the ISO 8859-7 module.  */               \
 384             if (inptr[2] < 0x20 || inptr[2] >= 0x80)                          \
 385               STANDARD_FROM_LOOP_ERR_HANDLER (1);                             \
 386             ch = iso88597_to_ucs4[inptr[2] - 0x20];                           \
 387             if (ch == 0)                                                      \
 388               STANDARD_FROM_LOOP_ERR_HANDLER (3);                             \
 389             inptr += 3;                                                       \
 390           }                                                                   \
 391         else                                                                  \
 392           {                                                                   \
 393             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
 394           }                                                                   \
 395       }                                                                       \
 396     else if (ch >= 0x80)                                                      \
 397       {                                                                       \
 398         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                   \
 399       }                                                                       \
 400     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))                   \
 401       /* Almost done, just advance the input pointer.  */                     \
 402       ++inptr;                                                                \
 403     else if (set == JISX0201_Roman_set)                                       \
 404       {                                                                       \
 405         /* Use the JIS X 0201 table.  */                                      \
 406         ch = jisx0201_to_ucs4 (ch);                                           \
 407         if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))                    \
 408           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
 409         ++inptr;                                                              \
 410       }                                                                       \
 411     else if (set == JISX0201_Kana_set)                                        \
 412       {                                                                       \
 413         /* Use the JIS X 0201 table.  */                                      \
 414         ch = jisx0201_to_ucs4 (ch + 0x80);                                    \
 415         if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))                    \
 416           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
 417         ++inptr;                                                              \
 418       }                                                                       \
 419     else                                                                      \
 420       {                                                                       \
 421         if (set == JISX0208_1978_set || set == JISX0208_1983_set)             \
 422           /* XXX I don't have the tables for these two old variants of        \
 423              JIS X 0208.  Therefore I'm using the tables for JIS X            \
 424              0208-1990.  If somebody has problems with this please            \
 425              provide the appropriate tables.  */                              \
 426           ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);                   \
 427         else if (set == JISX0212_set)                                         \
 428           /* Use the JIS X 0212 table.  */                                    \
 429           ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0);                   \
 430         else if (set == GB2312_set)                                           \
 431           /* Use the GB 2312 table.  */                                       \
 432           ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);                     \
 433         else                                                                  \
 434           {                                                                   \
 435             assert (set == KSC5601_set);                                      \
 436                                                                               \
 437             /* Use the KSC 5601 table.  */                                    \
 438             ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);                  \
 439           }                                                                   \
 440                                                                               \
 441         if (__glibc_unlikely (ch == 0))                                       \
 442           {                                                                   \
 443             result = __GCONV_INCOMPLETE_INPUT;                                \
 444             break;                                                            \
 445           }                                                                   \
 446         else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))               \
 447           {                                                                   \
 448             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
 449           }                                                                   \
 450       }                                                                       \
 451                                                                               \
 452     put32 (outptr, ch);                                                       \
 453     outptr += 4;                                                              \
 454   }
 455 #define LOOP_NEED_FLAGS
 456 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
 457 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
 458                                 int set2 = *setp & CURRENT_ASSIGN_MASK
 459 #define UPDATE_PARAMS           *setp = set | set2
 460 #include <iconv/loop.c>
 461
 462
 463 /* Next, define the other direction.  */
 464
 465 enum conversion { none = 0, european, japanese, chinese, korean, other };
 466
 467 /* A datatype for conversion lists.  */
 468 typedef unsigned int cvlist_t;
 469 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
 470   ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
 471 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
 472 #define CVLIST_REST(cvl) ((cvl) >> 3)
 473 static const cvlist_t conversion_lists[4] =
 474   {
 475     /* TAG_none */        CVLIST (japanese, european, chinese, korean, other),
 476     /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
 477     /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
 478     /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
 479   };
 480
 481 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
 482 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
 483 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
 484 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
 485 #define LOOPFCT                 TO_LOOP
 486 #define BODY \
 487   {                                                                           \
 488     uint32_t ch;                                                              \
 489     size_t written;                                                           \
 490                                                                               \
 491     ch = get32 (inptr);                                                       \
 492                                                                               \
 493     if (var == iso2022jp2)                                                    \
 494       {                                                                       \
 495         /* Handle Unicode tag characters (range U+E0000..U+E007F).  */        \
 496         if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7)))                   \
 497           {                                                                   \
 498             ch &= 0x7f;                                                       \
 499             if (ch >= 'A' && ch <= 'Z')                                       \
 500               ch += 'a' - 'A';                                                \
 501             if (ch == 0x01)                                                   \
 502               tag = TAG_language;                                             \
 503             else if (ch == 'j' && tag == TAG_language)                        \
 504               tag = TAG_language_j;                                           \
 505             else if (ch == 'a' && tag == TAG_language_j)                      \
 506               tag = TAG_language_ja;                                          \
 507             else if (ch == 'k' && tag == TAG_language)                        \
 508               tag = TAG_language_k;                                           \
 509             else if (ch == 'o' && tag == TAG_language_k)                      \
 510               tag = TAG_language_ko;                                          \
 511             else if (ch == 'z' && tag == TAG_language)                        \
 512               tag = TAG_language_z;                                           \
 513             else if (ch == 'h' && tag == TAG_language_z)                      \
 514               tag = TAG_language_zh;                                          \
 515             else if (ch == 0x7f)                                              \
 516               tag = TAG_none;                                                 \
 517             else                                                              \
 518               {                                                               \
 519                 /* Other tag characters reset the tag parsing state (if the   \
 520                    current state is a temporary state) or are ignored (if     \
 521                    the current state is a stable one).  */                    \
 522                 if (tag >= TAG_language)                                      \
 523                   tag = TAG_none;                                             \
 524               }                                                               \
 525                                                                               \
 526             inptr += 4;                                                       \
 527             continue;                                                         \
 528           }                                                                   \
 529                                                                               \
 530         /* Non-tag characters reset the tag parsing state, if the current     \
 531            state is a temporary state.  */                                    \
 532         if (__glibc_unlikely (tag >= TAG_language))                           \
 533           tag = TAG_none;                                                     \
 534       }                                                                       \
 535                                                                               \
 536     /* First see whether we can write the character using the currently       \
 537        selected character set.  But ignore the selected character set if      \
 538        the current language tag shows different preferences.  */              \
 539     if (set == ASCII_set)                                                     \
 540       {                                                                       \
 541         /* Please note that the NUL byte is *not* matched if we are not       \
 542            currently using the ASCII charset.  This is because we must        \
 543            switch to the initial state whenever a NUL byte is written.  */    \
 544         if (ch <= 0x7f)                                                       \
 545           {                                                                   \
 546             *outptr++ = ch;                                                   \
 547             written = 1;                                                      \
 548                                                                               \
 549             /* At the beginning of a line, G2 designation is cleared.  */     \
 550             if (var == iso2022jp2 && ch == 0x0a)                              \
 551               set2 = UNSPECIFIED_set;                                         \
 552           }                                                                   \
 553         else                                                                  \
 554           written = __UNKNOWN_10646_CHAR;                                     \
 555       }                                                                       \
 556     /* ISO-2022-JP recommends to encode the newline character always in       \
 557        ASCII since this allows a context-free interpretation of the           \
 558        characters at the beginning of the next line.  Otherwise it would      \
 559        have to be known whether the last line ended using ASCII or            \
 560        JIS X 0201.  */                                                        \
 561     else if (set == JISX0201_Roman_set                                        \
 562              && (__builtin_expect (tag == TAG_none, 1)                        \
 563                  || tag == TAG_language_ja))                                  \
 564       {                                                                       \
 565         unsigned char buf[1];                                                 \
 566         written = ucs4_to_jisx0201 (ch, buf);                                 \
 567         if (written != __UNKNOWN_10646_CHAR)                                  \
 568           {                                                                   \
 569             if (buf[0] > 0x20 && buf[0] < 0x80)                               \
 570               {                                                               \
 571                 *outptr++ = buf[0];                                           \
 572                 written = 1;                                                  \
 573               }                                                               \
 574             else                                                              \
 575               written = __UNKNOWN_10646_CHAR;                                 \
 576           }                                                                   \
 577       }                                                                       \
 578     else if (set == JISX0201_Kana_set                                         \
 579              && (__builtin_expect (tag == TAG_none, 1)                        \
 580                  || tag == TAG_language_ja))                                  \
 581       {                                                                       \
 582         unsigned char buf[1];                                                 \
 583         written = ucs4_to_jisx0201 (ch, buf);                                 \
 584         if (written != __UNKNOWN_10646_CHAR)                                  \
 585           {                                                                   \
 586             if (buf[0] > 0xa0 && buf[0] < 0xe0)                               \
 587               {                                                               \
 588                 *outptr++ = buf[0] - 0x80;                                    \
 589                 written = 1;                                                  \
 590               }                                                               \
 591             else                                                              \
 592               written = __UNKNOWN_10646_CHAR;                                 \
 593           }                                                                   \
 594       }                                                                       \
 595     else                                                                      \
 596       {                                                                       \
 597         if ((set == JISX0208_1978_set || set == JISX0208_1983_set)            \
 598             && (__builtin_expect (tag == TAG_none, 1)                         \
 599                 || tag == TAG_language_ja))                                   \
 600           written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);           \
 601         else if (set == JISX0212_set                                          \
 602                  && (__builtin_expect (tag == TAG_none, 1)                    \
 603                      || tag == TAG_language_ja))                              \
 604           written = ucs4_to_jisx0212 (ch, outptr, outend - outptr);           \
 605         else if (set == GB2312_set                                            \
 606                  && (__builtin_expect (tag == TAG_none, 1)                    \
 607                      || tag == TAG_language_zh))                              \
 608           written = ucs4_to_gb2312 (ch, outptr, outend - outptr);             \
 609         else if (set == KSC5601_set                                           \
 610                  && (__builtin_expect (tag == TAG_none, 1)                    \
 611                      || tag == TAG_language_ko))                              \
 612           written = ucs4_to_ksc5601 (ch, outptr, outend - outptr);            \
 613         else                                                                  \
 614           written = __UNKNOWN_10646_CHAR;                                     \
 615                                                                               \
 616         if (__glibc_unlikely (written == 0))                                  \
 617           {                                                                   \
 618             result = __GCONV_FULL_OUTPUT;                                     \
 619             break;                                                            \
 620           }                                                                   \
 621         else if (written != __UNKNOWN_10646_CHAR)                             \
 622           outptr += written;                                                  \
 623       }                                                                       \
 624                                                                               \
 625     if (written == __UNKNOWN_10646_CHAR                                       \
 626         && __builtin_expect (tag == TAG_none, 1))                             \
 627       {                                                                       \
 628         if (set2 == ISO88591_set)                                             \
 629           {                                                                   \
 630             if (ch >= 0x80 && ch <= 0xff)                                     \
 631               {                                                               \
 632                 if (__glibc_unlikely (outptr + 3 > outend))                   \
 633                   {                                                           \
 634                     result = __GCONV_FULL_OUTPUT;                             \
 635                     break;                                                    \
 636                   }                                                           \
 637                                                                               \
 638                 *outptr++ = ESC;                                              \
 639                 *outptr++ = 'N';                                              \
 640                 *outptr++ = ch & 0x7f;                                        \
 641                 written = 3;                                                  \
 642               }                                                               \
 643           }                                                                   \
 644         else if (set2 == ISO88597_set)                                        \
 645           {                                                                   \
 646             if (__glibc_likely (ch < 0xffff))                                 \
 647               {                                                               \
 648                 const struct gap *rp = from_idx;                              \
 649                                                                               \
 650                 while (ch > rp->end)                                          \
 651                   ++rp;                                                       \
 652                 if (ch >= rp->start)                                          \
 653                   {                                                           \
 654                     unsigned char res =                                       \
 655                       iso88597_from_ucs4[ch - 0xa0 + rp->idx];                \
 656                     if (res != '\0')                                          \
 657                       {                                                       \
 658                         if (__glibc_unlikely (outptr + 3 > outend))           \
 659                           {                                                   \
 660                             result = __GCONV_FULL_OUTPUT;                     \
 661                             break;                                            \
 662                           }                                                   \
 663                                                                               \
 664                         *outptr++ = ESC;                                      \
 665                         *outptr++ = 'N';                                      \
 666                         *outptr++ = res & 0x7f;                               \
 667                         written = 3;                                          \
 668                       }                                                       \
 669                   }                                                           \
 670               }                                                               \
 671           }                                                                   \
 672       }                                                                       \
 673                                                                               \
 674     if (written == __UNKNOWN_10646_CHAR)                                      \
 675       {                                                                       \
 676         /* The attempts to use the currently selected character set           \
 677            failed, either because the language tag changed, or because        \
 678            the character requires a different character set, or because       \
 679            the character is unknown.                                          \
 680            The CJK character sets partially overlap when seen as subsets      \
 681            of ISO 10646; therefore there is no single correct result.         \
 682            We use a preferrence order which depends on the language tag.  */  \
 683                                                                               \
 684         if (ch <= 0x7f)                                                       \
 685           {                                                                   \
 686             /* We must encode using ASCII.  First write out the               \
 687                escape sequence.  */                                           \
 688             if (__glibc_unlikely (outptr + 3 > outend))                       \
 689               {                                                               \
 690                 result = __GCONV_FULL_OUTPUT;                                 \
 691                 break;                                                        \
 692               }                                                               \
 693                                                                               \
 694             *outptr++ = ESC;                                                  \
 695             *outptr++ = '(';                                                  \
 696             *outptr++ = 'B';                                                  \
 697             set = ASCII_set;                                                  \
 698                                                                               \
 699             if (__glibc_unlikely (outptr + 1 > outend))                       \
 700               {                                                               \
 701                 result = __GCONV_FULL_OUTPUT;                                 \
 702                 break;                                                        \
 703               }                                                               \
 704             *outptr++ = ch;                                                   \
 705                                                                               \
 706             /* At the beginning of a line, G2 designation is cleared.  */     \
 707             if (var == iso2022jp2 && ch == 0x0a)                              \
 708               set2 = UNSPECIFIED_set;                                         \
 709           }                                                                   \
 710         else                                                                  \
 711           {                                                                   \
 712             /* Now it becomes difficult.  We must search the other            \
 713                character sets one by one.  Use an ordered conversion          \
 714                list that depends on the current language tag.  */             \
 715             cvlist_t conversion_list;                                         \
 716             unsigned char buf[2];                                             \
 717             int res = __GCONV_ILLEGAL_INPUT;                                  \
 718                                                                               \
 719             if (var == iso2022jp2)                                            \
 720               conversion_list = conversion_lists[tag >> 8];                   \
 721             else                                                              \
 722               conversion_list = CVLIST (japanese, 0, 0, 0, 0);                \
 723                                                                               \
 724             do                                                                \
 725               switch (CVLIST_FIRST (conversion_list))                         \
 726                 {                                                             \
 727                 case european:                                                \
 728                                                                               \
 729                   /* Try ISO 8859-1 upper half.   */                          \
 730                   if (ch >= 0x80 && ch <= 0xff)                               \
 731                     {                                                         \
 732                       if (set2 != ISO88591_set)                               \
 733                         {                                                     \
 734                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 735                             {                                                 \
 736                               res = __GCONV_FULL_OUTPUT;                      \
 737                               break;                                          \
 738                             }                                                 \
 739                           *outptr++ = ESC;                                    \
 740                           *outptr++ = '.';                                    \
 741                           *outptr++ = 'A';                                    \
 742                           set2 = ISO88591_set;                                \
 743                         }                                                     \
 744                                                                               \
 745                       if (__glibc_unlikely (outptr + 3 > outend))             \
 746                         {                                                     \
 747                           res = __GCONV_FULL_OUTPUT;                          \
 748                           break;                                              \
 749                         }                                                     \
 750                       *outptr++ = ESC;                                        \
 751                       *outptr++ = 'N';                                        \
 752                       *outptr++ = ch - 0x80;                                  \
 753                       res = __GCONV_OK;                                       \
 754                       break;                                                  \
 755                     }                                                         \
 756                                                                               \
 757                   /* Try ISO 8859-7 upper half.  */                           \
 758                   if (__glibc_likely (ch < 0xffff))                           \
 759                     {                                                         \
 760                       const struct gap *rp = from_idx;                        \
 761                                                                               \
 762                       while (ch > rp->end)                                    \
 763                         ++rp;                                                 \
 764                       if (ch >= rp->start)                                    \
 765                         {                                                     \
 766                           unsigned char ch2 =                                 \
 767                             iso88597_from_ucs4[ch - 0xa0 + rp->idx];          \
 768                           if (ch2 != '\0')                                    \
 769                             {                                                 \
 770                               if (set2 != ISO88597_set)                       \
 771                                 {                                             \
 772                                   if (__builtin_expect (outptr + 3 > outend,  \
 773                                                         0))                   \
 774                                     {                                         \
 775                                       res = __GCONV_FULL_OUTPUT;              \
 776                                       break;                                  \
 777                                     }                                         \
 778                                   *outptr++ = ESC;                            \
 779                                   *outptr++ = '.';                            \
 780                                   *outptr++ = 'F';                            \
 781                                   set2 = ISO88597_set;                        \
 782                                 }                                             \
 783                                                                               \
 784                               if (__builtin_expect (outptr + 3 > outend, 0))  \
 785                                 {                                             \
 786                                   res = __GCONV_FULL_OUTPUT;                  \
 787                                   break;                                      \
 788                                 }                                             \
 789                               *outptr++ = ESC;                                \
 790                               *outptr++ = 'N';                                \
 791                               *outptr++ = ch2 - 0x80;                         \
 792                               res = __GCONV_OK;                               \
 793                               break;                                          \
 794                             }                                                 \
 795                         }                                                     \
 796                     }                                                         \
 797                                                                               \
 798                   break;                                                      \
 799                                                                               \
 800                 case japanese:                                                \
 801                                                                               \
 802                   /* Try JIS X 0201 Roman.  */                                \
 803                   written = ucs4_to_jisx0201 (ch, buf);                       \
 804                   if (written != __UNKNOWN_10646_CHAR                         \
 805                       && buf[0] > 0x20 && buf[0] < 0x80)                      \
 806                     {                                                         \
 807                       if (set != JISX0201_Roman_set)                          \
 808                         {                                                     \
 809                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 810                             {                                                 \
 811                               res = __GCONV_FULL_OUTPUT;                      \
 812                               break;                                          \
 813                             }                                                 \
 814                           *outptr++ = ESC;                                    \
 815                           *outptr++ = '(';                                    \
 816                           *outptr++ = 'J';                                    \
 817                           set = JISX0201_Roman_set;                           \
 818                         }                                                     \
 819                                                                               \
 820                       if (__glibc_unlikely (outptr + 1 > outend))             \
 821                         {                                                     \
 822                           res = __GCONV_FULL_OUTPUT;                          \
 823                           break;                                              \
 824                         }                                                     \
 825                       *outptr++ = buf[0];                                     \
 826                       res = __GCONV_OK;                                       \
 827                       break;                                                  \
 828                     }                                                         \
 829                                                                               \
 830                   /* Try JIS X 0208.  */                                      \
 831                   written = ucs4_to_jisx0208 (ch, buf, 2);                    \
 832                   if (written != __UNKNOWN_10646_CHAR)                        \
 833                     {                                                         \
 834                       if (set != JISX0208_1983_set)                           \
 835                         {                                                     \
 836                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 837                             {                                                 \
 838                               res = __GCONV_FULL_OUTPUT;                      \
 839                               break;                                          \
 840                             }                                                 \
 841                           *outptr++ = ESC;                                    \
 842                           *outptr++ = '$';                                    \
 843                           *outptr++ = 'B';                                    \
 844                           set = JISX0208_1983_set;                            \
 845                         }                                                     \
 846                                                                               \
 847                       if (__glibc_unlikely (outptr + 2 > outend))             \
 848                         {                                                     \
 849                           res = __GCONV_FULL_OUTPUT;                          \
 850                           break;                                              \
 851                         }                                                     \
 852                       *outptr++ = buf[0];                                     \
 853                       *outptr++ = buf[1];                                     \
 854                       res = __GCONV_OK;                                       \
 855                       break;                                                  \
 856                     }                                                         \
 857                                                                               \
 858                   if (__glibc_unlikely (var == iso2022jp))                    \
 859                     /* Don't use the other Japanese character sets.  */       \
 860                     break;                                                    \
 861                                                                               \
 862                   /* Try JIS X 0212.  */                                      \
 863                   written = ucs4_to_jisx0212 (ch, buf, 2);                    \
 864                   if (written != __UNKNOWN_10646_CHAR)                        \
 865                     {                                                         \
 866                       if (set != JISX0212_set)                                \
 867                         {                                                     \
 868                           if (__builtin_expect (outptr + 4 > outend, 0))      \
 869                             {                                                 \
 870                               res = __GCONV_FULL_OUTPUT;                      \
 871                               break;                                          \
 872                             }                                                 \
 873                           *outptr++ = ESC;                                    \
 874                           *outptr++ = '$';                                    \
 875                           *outptr++ = '(';                                    \
 876                           *outptr++ = 'D';                                    \
 877                           set = JISX0212_set;                                 \
 878                         }                                                     \
 879                                                                               \
 880                       if (__glibc_unlikely (outptr + 2 > outend))             \
 881                         {                                                     \
 882                           res = __GCONV_FULL_OUTPUT;                          \
 883                           break;                                              \
 884                         }                                                     \
 885                       *outptr++ = buf[0];                                     \
 886                       *outptr++ = buf[1];                                     \
 887                       res = __GCONV_OK;                                       \
 888                       break;                                                  \
 889                     }                                                         \
 890                                                                               \
 891                   break;                                                      \
 892                                                                               \
 893                 case chinese:                                                 \
 894                   assert (var == iso2022jp2);                                 \
 895                                                                               \
 896                   /* Try GB 2312.  */                                         \
 897                   written = ucs4_to_gb2312 (ch, buf, 2);                      \
 898                   if (written != __UNKNOWN_10646_CHAR)                        \
 899                     {                                                         \
 900                       if (set != GB2312_set)                                  \
 901                         {                                                     \
 902                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 903                             {                                                 \
 904                               res = __GCONV_FULL_OUTPUT;                      \
 905                               break;                                          \
 906                             }                                                 \
 907                           *outptr++ = ESC;                                    \
 908                           *outptr++ = '$';                                    \
 909                           *outptr++ = 'A';                                    \
 910                           set = GB2312_set;                                   \
 911                         }                                                     \
 912                                                                               \
 913                       if (__glibc_unlikely (outptr + 2 > outend))             \
 914                         {                                                     \
 915                           res = __GCONV_FULL_OUTPUT;                          \
 916                           break;                                              \
 917                         }                                                     \
 918                       *outptr++ = buf[0];                                     \
 919                       *outptr++ = buf[1];                                     \
 920                       res = __GCONV_OK;                                       \
 921                       break;                                                  \
 922                     }                                                         \
 923                                                                               \
 924                   break;                                                      \
 925                                                                               \
 926                 case korean:                                                  \
 927                   assert (var == iso2022jp2);                                 \
 928                                                                               \
 929                   /* Try KSC 5601.  */                                        \
 930                   written = ucs4_to_ksc5601 (ch, buf, 2);                     \
 931                   if (written != __UNKNOWN_10646_CHAR)                        \
 932                     {                                                         \
 933                       if (set != KSC5601_set)                                 \
 934                         {                                                     \
 935                           if (__builtin_expect (outptr + 4 > outend, 0))      \
 936                             {                                                 \
 937                               res = __GCONV_FULL_OUTPUT;                      \
 938                               break;                                          \
 939                             }                                                 \
 940                           *outptr++ = ESC;                                    \
 941                           *outptr++ = '$';                                    \
 942                           *outptr++ = '(';                                    \
 943                           *outptr++ = 'C';                                    \
 944                           set = KSC5601_set;                                  \
 945                         }                                                     \
 946                                                                               \
 947                       if (__glibc_unlikely (outptr + 2 > outend))             \
 948                         {                                                     \
 949                           res = __GCONV_FULL_OUTPUT;                          \
 950                           break;                                              \
 951                         }                                                     \
 952                       *outptr++ = buf[0];                                     \
 953                       *outptr++ = buf[1];                                     \
 954                       res = __GCONV_OK;                                       \
 955                       break;                                                  \
 956                     }                                                         \
 957                                                                               \
 958                   break;                                                      \
 959                                                                               \
 960                 case other:                                                   \
 961                   assert (var == iso2022jp2);                                 \
 962                                                                               \
 963                   /* Try JIS X 0201 Kana.  This is not officially part        \
 964                      of ISO-2022-JP-2, according to RFC 1554.  Therefore      \
 965                      we try this only after all other attempts.  */           \
 966                   written = ucs4_to_jisx0201 (ch, buf);                       \
 967                   if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80)      \
 968                     {                                                         \
 969                       if (set != JISX0201_Kana_set)                           \
 970                         {                                                     \
 971                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 972                             {                                                 \
 973                               res = __GCONV_FULL_OUTPUT;                      \
 974                               break;                                          \
 975                             }                                                 \
 976                           *outptr++ = ESC;                                    \
 977                           *outptr++ = '(';                                    \
 978                           *outptr++ = 'I';                                    \
 979                           set = JISX0201_Kana_set;                            \
 980                         }                                                     \
 981                                                                               \
 982                       if (__glibc_unlikely (outptr + 1 > outend))             \
 983                         {                                                     \
 984                           res = __GCONV_FULL_OUTPUT;                          \
 985                           break;                                              \
 986                         }                                                     \
 987                       *outptr++ = buf[0] - 0x80;                              \
 988                       res = __GCONV_OK;                                       \
 989                       break;                                                  \
 990                     }                                                         \
 991                                                                               \
 992                   break;                                                      \
 993                                                                               \
 994                 default:                                                      \
 995                   abort ();                                                   \
 996                 }                                                             \
 997             while (res == __GCONV_ILLEGAL_INPUT                               \
 998                    && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
 999                                                                               \
1000             if (res == __GCONV_FULL_OUTPUT)                                   \
1001               {                                                               \
1002                 result = res;                                                 \
1003                 break;                                                        \
1004               }                                                               \
1005                                                                               \
1006             if (res == __GCONV_ILLEGAL_INPUT)                                 \
1007               {                                                               \
1008                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
1009               }                                                               \
1010           }                                                                   \
1011       }                                                                       \
1012                                                                               \
1013     /* Now that we wrote the output increment the input pointer.  */          \
1014     inptr += 4;                                                               \
1015   }
1016 #define LOOP_NEED_FLAGS
1017 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
1018 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
1019                                 int set2 = *setp & CURRENT_ASSIGN_MASK;       \
1020                                 int tag = *setp & CURRENT_TAG_MASK;
1021 #define REINIT_PARAMS           do                                            \
1022                                   {                                           \
1023                                     set = *setp & CURRENT_SEL_MASK;           \
1024                                     set2 = *setp & CURRENT_ASSIGN_MASK;       \
1025                                     tag = *setp & CURRENT_TAG_MASK;           \
1026                                   }                                           \
1027                                 while (0)
1028 #define UPDATE_PARAMS           *setp = set | set2 | tag
1029 #include <iconv/loop.c>
1030
1031
1032 /* Now define the toplevel functions.  */
1033 #include <iconv/skeleton.c>