iconvdata/iso-2022-jp.c

   1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
   2    Copyright (C) 1998-2021 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <https://www.gnu.org/licenses/>.  */
  19
  20 #include <assert.h>
  21 #include <dlfcn.h>
  22 #include <gconv.h>
  23 #include <stdint.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include "jis0201.h"
  27 #include "jis0208.h"
  28 #include "jis0212.h"
  29 #include "gb2312.h"
  30 #include "ksc5601.h"
  31
  32 struct gap
  33 {
  34   uint16_t start;
  35   uint16_t end;
  36   int32_t idx;
  37 };
  38
  39 #include "iso8859-7jp.h"
  40
  41 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
  42 #define ESC 0x1b
  43
  44 /* We provide our own initialization and destructor function.  */
  45 #define DEFINE_INIT     0
  46 #define DEFINE_FINI     0
  47
  48 /* Definitions used in the body of the `gconv' function.  */
  49 #define FROM_LOOP               from_iso2022jp_loop
  50 #define TO_LOOP                 to_iso2022jp_loop
  51 #define ONE_DIRECTION                   0
  52 #define FROM_LOOP_MIN_NEEDED_FROM       1
  53 #define FROM_LOOP_MAX_NEEDED_FROM       4
  54 #define FROM_LOOP_MIN_NEEDED_TO         4
  55 #define FROM_LOOP_MAX_NEEDED_TO         4
  56 #define TO_LOOP_MIN_NEEDED_FROM         4
  57 #define TO_LOOP_MAX_NEEDED_FROM         4
  58 #define TO_LOOP_MIN_NEEDED_TO           1
  59 #define TO_LOOP_MAX_NEEDED_TO           6
  60 #define FROM_DIRECTION          (dir == from_iso2022jp)
  61 #define PREPARE_LOOP \
  62   enum direction dir = ((struct iso2022jp_data *) step->__data)->dir;         \
  63   enum variant var = ((struct iso2022jp_data *) step->__data)->var;           \
  64   int save_set;                                                               \
  65   int *setp = &data->__statep->__count;
  66 #define EXTRA_LOOP_ARGS         , var, setp
  67
  68
  69 /* Direction of the transformation.  */
  70 enum direction
  71 {
  72   illegal_dir,
  73   to_iso2022jp,
  74   from_iso2022jp
  75 };
  76
  77 /* We handle ISO-2022-jp and ISO-2022-JP-2 here.  */
  78 enum variant
  79 {
  80   illegal_var,
  81   iso2022jp,
  82   iso2022jp2
  83 };
  84
  85
  86 struct iso2022jp_data
  87 {
  88   enum direction dir;
  89   enum variant var;
  90 };
  91
  92
  93 /* The COUNT element of the state keeps track of the currently selected
  94    character set.  The possible values are:  */
  95 enum
  96 {
  97   ASCII_set = 0,
  98   JISX0208_1978_set = 1 << 3,
  99   JISX0208_1983_set = 2 << 3,
 100   JISX0201_Roman_set = 3 << 3,
 101   JISX0201_Kana_set = 4 << 3,
 102   GB2312_set = 5 << 3,
 103   KSC5601_set = 6 << 3,
 104   JISX0212_set = 7 << 3,
 105   CURRENT_SEL_MASK = 7 << 3
 106 };
 107
 108 /* The second value stored is the designation of the G2 set.  The following
 109    values are possible:  */
 110 enum
 111 {
 112   UNSPECIFIED_set = 0,
 113   ISO88591_set = 1 << 6,
 114   ISO88597_set = 2 << 6,
 115   CURRENT_ASSIGN_MASK = 3 << 6
 116 };
 117
 118 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
 119    describes the language tag parsing status.  The possible values are as
 120    follows.  Values >= TAG_language are temporary tag parsing states.  */
 121 enum
 122 {
 123   TAG_none = 0,
 124   TAG_language = 4 << 8,
 125   TAG_language_j = 5 << 8,
 126   TAG_language_ja = 1 << 8,
 127   TAG_language_k = 6 << 8,
 128   TAG_language_ko = 2 << 8,
 129   TAG_language_z = 7 << 8,
 130   TAG_language_zh = 3 << 8,
 131   CURRENT_TAG_MASK = 7 << 8
 132 };
 133
 134
 135 extern int gconv_init (struct __gconv_step *step);
 136 int
 137 gconv_init (struct __gconv_step *step)
 138 {
 139   /* Determine which direction.  */
 140   struct iso2022jp_data *new_data;
 141   enum direction dir = illegal_dir;
 142   enum variant var = illegal_var;
 143   int result;
 144
 145   if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
 146     {
 147       dir = from_iso2022jp;
 148       var = iso2022jp;
 149     }
 150   else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
 151     {
 152       dir = to_iso2022jp;
 153       var = iso2022jp;
 154     }
 155   else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
 156     {
 157       dir = from_iso2022jp;
 158       var = iso2022jp2;
 159     }
 160   else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
 161     {
 162       dir = to_iso2022jp;
 163       var = iso2022jp2;
 164     }
 165
 166   result = __GCONV_NOCONV;
 167   if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
 168     {
 169       new_data
 170         = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
 171
 172       result = __GCONV_NOMEM;
 173       if (new_data != NULL)
 174         {
 175           new_data->dir = dir;
 176           new_data->var = var;
 177           step->__data = new_data;
 178
 179           if (dir == from_iso2022jp)
 180             {
 181               step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
 182               step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
 183               step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
 184               step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
 185             }
 186           else
 187             {
 188               step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
 189               step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
 190               step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
 191               step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
 192             }
 193
 194           /* Yes, this is a stateful encoding.  */
 195           step->__stateful = 1;
 196
 197           result = __GCONV_OK;
 198         }
 199     }
 200
 201   return result;
 202 }
 203
 204
 205 extern void gconv_end (struct __gconv_step *data);
 206 void
 207 gconv_end (struct __gconv_step *data)
 208 {
 209   free (data->__data);
 210 }
 211
 212
 213 /* Since this is a stateful encoding we have to provide code which resets
 214    the output state to the initial state.  This has to be done during the
 215    flushing.  */
 216 #define EMIT_SHIFT_TO_INIT \
 217   /* Avoid warning about unused variable 'var'.  */                           \
 218   (void) var;                                                                 \
 219                                                                               \
 220   if ((data->__statep->__count & ~7) != ASCII_set)                            \
 221     {                                                                         \
 222       if (dir == from_iso2022jp                                               \
 223           || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set)       \
 224         {                                                                     \
 225           /* It's easy, we don't have to emit anything, we just reset the     \
 226              state for the input.  Note that this also clears the G2          \
 227              designation.  */                                                 \
 228           data->__statep->__count &= 7;                                       \
 229           data->__statep->__count |= ASCII_set;                               \
 230         }                                                                     \
 231       else                                                                    \
 232         {                                                                     \
 233           /* We are not in the initial state.  To switch back we have         \
 234              to emit the sequence `Esc ( B'.  */                              \
 235           if (__glibc_unlikely (outbuf + 3 > outend))                         \
 236             /* We don't have enough room in the output buffer.  */            \
 237             status = __GCONV_FULL_OUTPUT;                                     \
 238           else                                                                \
 239             {                                                                 \
 240               /* Write out the shift sequence.  */                            \
 241               *outbuf++ = ESC;                                                \
 242               *outbuf++ = '(';                                                \
 243               *outbuf++ = 'B';                                                \
 244               /* Note that this also clears the G2 designation.  */           \
 245               data->__statep->__count &= 7;                                   \
 246               data->__statep->__count |= ASCII_set;                           \
 247             }                                                                 \
 248         }                                                                     \
 249     }
 250
 251
 252 /* Since we might have to reset input pointer we must be able to save
 253    and retore the state.  */
 254 #define SAVE_RESET_STATE(Save) \
 255   if (Save)                                                                   \
 256     save_set = *setp;                                                         \
 257   else                                                                        \
 258     *setp = save_set
 259
 260
 261 /* First define the conversion function from ISO-2022-JP to UCS4.  */
 262 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
 263 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
 264 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
 265 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
 266 #define LOOPFCT                 FROM_LOOP
 267 #define BODY \
 268   {                                                                           \
 269     uint32_t ch = *inptr;                                                     \
 270                                                                               \
 271     /* Recognize escape sequences.  */                                        \
 272     if (__builtin_expect (ch, 0) == ESC)                                      \
 273       {                                                                       \
 274         /* We now must be prepared to read two to three more                  \
 275            characters.  If we have a match in the first character but         \
 276            then the input buffer ends we terminate with an error since        \
 277            we must not risk missing an escape sequence just because it        \
 278            is not entirely in the current input buffer.  */                   \
 279         if (__builtin_expect (inptr + 2 >= inend, 0)                          \
 280             || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '('       \
 281                 && __builtin_expect (inptr + 3 >= inend, 0)))                 \
 282           {                                                                   \
 283             /* Not enough input available.  */                                \
 284             result = __GCONV_INCOMPLETE_INPUT;                                \
 285             break;                                                            \
 286           }                                                                   \
 287                                                                               \
 288         if (inptr[1] == '(')                                                  \
 289           {                                                                   \
 290             if (inptr[2] == 'B')                                              \
 291               {                                                               \
 292                 /* ASCII selected.  */                                        \
 293                 set = ASCII_set;                                              \
 294                 inptr += 3;                                                   \
 295                 continue;                                                     \
 296               }                                                               \
 297             else if (inptr[2] == 'J')                                         \
 298               {                                                               \
 299                 /* JIS X 0201 selected.  */                                   \
 300                 set = JISX0201_Roman_set;                                     \
 301                 inptr += 3;                                                   \
 302                 continue;                                                     \
 303               }                                                               \
 304             else if (var == iso2022jp2 && inptr[2] == 'I')                    \
 305               {                                                               \
 306                 /* JIS X 0201 selected.  */                                   \
 307                 set = JISX0201_Kana_set;                                      \
 308                 inptr += 3;                                                   \
 309                 continue;                                                     \
 310               }                                                               \
 311           }                                                                   \
 312         else if (inptr[1] == '$')                                             \
 313           {                                                                   \
 314             if (inptr[2] == '@')                                              \
 315               {                                                               \
 316                 /* JIS X 0208-1978 selected.  */                              \
 317                 set = JISX0208_1978_set;                                      \
 318                 inptr += 3;                                                   \
 319                 continue;                                                     \
 320               }                                                               \
 321             else if (inptr[2] == 'B')                                         \
 322               {                                                               \
 323                 /* JIS X 0208-1983 selected.  */                              \
 324                 set = JISX0208_1983_set;                                      \
 325                 inptr += 3;                                                   \
 326                 continue;                                                     \
 327               }                                                               \
 328             else if (var == iso2022jp2)                                       \
 329               {                                                               \
 330                 if (inptr[2] == 'A')                                          \
 331                   {                                                           \
 332                     /* GB 2312-1980 selected.  */                             \
 333                     set = GB2312_set;                                         \
 334                     inptr += 3;                                               \
 335                     continue;                                                 \
 336                   }                                                           \
 337                 else if (inptr[2] == '(')                                     \
 338                   {                                                           \
 339                     if (inptr[3] == 'C')                                      \
 340                       {                                                       \
 341                         /* KSC 5601-1987 selected.  */                        \
 342                         set = KSC5601_set;                                    \
 343                         inptr += 4;                                           \
 344                         continue;                                             \
 345                       }                                                       \
 346                     else if (inptr[3] == 'D')                                 \
 347                       {                                                       \
 348                         /* JIS X 0212-1990 selected.  */                      \
 349                         set = JISX0212_set;                                   \
 350                         inptr += 4;                                           \
 351                         continue;                                             \
 352                       }                                                       \
 353                   }                                                           \
 354               }                                                               \
 355           }                                                                   \
 356         else if (var == iso2022jp2 && inptr[1] == '.')                        \
 357           {                                                                   \
 358             if (inptr[2] == 'A')                                              \
 359               {                                                               \
 360                 /* ISO 8859-1-GR selected.  */                                \
 361                 set2 = ISO88591_set;                                          \
 362                 inptr += 3;                                                   \
 363                 continue;                                                     \
 364               }                                                               \
 365             else if (inptr[2] == 'F')                                         \
 366               {                                                               \
 367                 /* ISO 8859-7-GR selected.  */                                \
 368                 set2 = ISO88597_set;                                          \
 369                 inptr += 3;                                                   \
 370                 continue;                                                     \
 371               }                                                               \
 372           }                                                                   \
 373       }                                                                       \
 374                                                                               \
 375     if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')                    \
 376       {                                                                       \
 377         if (set2 == ISO88591_set)                                             \
 378           {                                                                   \
 379             ch = inptr[2] | 0x80;                                             \
 380             inptr += 3;                                                       \
 381           }                                                                   \
 382         else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)       \
 383           {                                                                   \
 384             /* We use the table from the ISO 8859-7 module.  */               \
 385             if (inptr[2] < 0x20 || inptr[2] >= 0x80)                          \
 386               STANDARD_FROM_LOOP_ERR_HANDLER (1);                             \
 387             ch = iso88597_to_ucs4[inptr[2] - 0x20];                           \
 388             if (ch == 0)                                                      \
 389               STANDARD_FROM_LOOP_ERR_HANDLER (3);                             \
 390             inptr += 3;                                                       \
 391           }                                                                   \
 392         else                                                                  \
 393           {                                                                   \
 394             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
 395           }                                                                   \
 396       }                                                                       \
 397     else if (ch >= 0x80)                                                      \
 398       {                                                                       \
 399         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                   \
 400       }                                                                       \
 401     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))                   \
 402       /* Almost done, just advance the input pointer.  */                     \
 403       ++inptr;                                                                \
 404     else if (set == JISX0201_Roman_set)                                       \
 405       {                                                                       \
 406         /* Use the JIS X 0201 table.  */                                      \
 407         ch = jisx0201_to_ucs4 (ch);                                           \
 408         if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))                    \
 409           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
 410         ++inptr;                                                              \
 411       }                                                                       \
 412     else if (set == JISX0201_Kana_set)                                        \
 413       {                                                                       \
 414         /* Use the JIS X 0201 table.  */                                      \
 415         ch = jisx0201_to_ucs4 (ch + 0x80);                                    \
 416         if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))                    \
 417           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
 418         ++inptr;                                                              \
 419       }                                                                       \
 420     else                                                                      \
 421       {                                                                       \
 422         if (set == JISX0208_1978_set || set == JISX0208_1983_set)             \
 423           /* XXX I don't have the tables for these two old variants of        \
 424              JIS X 0208.  Therefore I'm using the tables for JIS X            \
 425              0208-1990.  If somebody has problems with this please            \
 426              provide the appropriate tables.  */                              \
 427           ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);                   \
 428         else if (set == JISX0212_set)                                         \
 429           /* Use the JIS X 0212 table.  */                                    \
 430           ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0);                   \
 431         else if (set == GB2312_set)                                           \
 432           /* Use the GB 2312 table.  */                                       \
 433           ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);                     \
 434         else                                                                  \
 435           {                                                                   \
 436             assert (set == KSC5601_set);                                      \
 437                                                                               \
 438             /* Use the KSC 5601 table.  */                                    \
 439             ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);                  \
 440           }                                                                   \
 441                                                                               \
 442         if (__glibc_unlikely (ch == 0))                                       \
 443           {                                                                   \
 444             result = __GCONV_INCOMPLETE_INPUT;                                \
 445             break;                                                            \
 446           }                                                                   \
 447         else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))               \
 448           {                                                                   \
 449             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
 450           }                                                                   \
 451       }                                                                       \
 452                                                                               \
 453     put32 (outptr, ch);                                                       \
 454     outptr += 4;                                                              \
 455   }
 456 #define LOOP_NEED_FLAGS
 457 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
 458 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
 459                                 int set2 = *setp & CURRENT_ASSIGN_MASK
 460 #define UPDATE_PARAMS           *setp = set | set2
 461 #include <iconv/loop.c>
 462
 463
 464 /* Next, define the other direction.  */
 465
 466 enum conversion { none = 0, european, japanese, chinese, korean, other };
 467
 468 /* A datatype for conversion lists.  */
 469 typedef unsigned int cvlist_t;
 470 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
 471   ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
 472 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
 473 #define CVLIST_REST(cvl) ((cvl) >> 3)
 474 static const cvlist_t conversion_lists[4] =
 475   {
 476     /* TAG_none */        CVLIST (japanese, european, chinese, korean, other),
 477     /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
 478     /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
 479     /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
 480   };
 481
 482 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
 483 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
 484 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
 485 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
 486 #define LOOPFCT                 TO_LOOP
 487 #define BODY \
 488   {                                                                           \
 489     uint32_t ch;                                                              \
 490     size_t written;                                                           \
 491                                                                               \
 492     ch = get32 (inptr);                                                       \
 493                                                                               \
 494     if (var == iso2022jp2)                                                    \
 495       {                                                                       \
 496         /* Handle Unicode tag characters (range U+E0000..U+E007F).  */        \
 497         if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7)))                   \
 498           {                                                                   \
 499             ch &= 0x7f;                                                       \
 500             if (ch >= 'A' && ch <= 'Z')                                       \
 501               ch += 'a' - 'A';                                                \
 502             if (ch == 0x01)                                                   \
 503               tag = TAG_language;                                             \
 504             else if (ch == 'j' && tag == TAG_language)                        \
 505               tag = TAG_language_j;                                           \
 506             else if (ch == 'a' && tag == TAG_language_j)                      \
 507               tag = TAG_language_ja;                                          \
 508             else if (ch == 'k' && tag == TAG_language)                        \
 509               tag = TAG_language_k;                                           \
 510             else if (ch == 'o' && tag == TAG_language_k)                      \
 511               tag = TAG_language_ko;                                          \
 512             else if (ch == 'z' && tag == TAG_language)                        \
 513               tag = TAG_language_z;                                           \
 514             else if (ch == 'h' && tag == TAG_language_z)                      \
 515               tag = TAG_language_zh;                                          \
 516             else if (ch == 0x7f)                                              \
 517               tag = TAG_none;                                                 \
 518             else                                                              \
 519               {                                                               \
 520                 /* Other tag characters reset the tag parsing state (if the   \
 521                    current state is a temporary state) or are ignored (if     \
 522                    the current state is a stable one).  */                    \
 523                 if (tag >= TAG_language)                                      \
 524                   tag = TAG_none;                                             \
 525               }                                                               \
 526                                                                               \
 527             inptr += 4;                                                       \
 528             continue;                                                         \
 529           }                                                                   \
 530                                                                               \
 531         /* Non-tag characters reset the tag parsing state, if the current     \
 532            state is a temporary state.  */                                    \
 533         if (__glibc_unlikely (tag >= TAG_language))                           \
 534           tag = TAG_none;                                                     \
 535       }                                                                       \
 536                                                                               \
 537     /* First see whether we can write the character using the currently       \
 538        selected character set.  But ignore the selected character set if      \
 539        the current language tag shows different preferences.  */              \
 540     if (set == ASCII_set)                                                     \
 541       {                                                                       \
 542         /* Please note that the NUL byte is *not* matched if we are not       \
 543            currently using the ASCII charset.  This is because we must        \
 544            switch to the initial state whenever a NUL byte is written.  */    \
 545         if (ch <= 0x7f)                                                       \
 546           {                                                                   \
 547             *outptr++ = ch;                                                   \
 548             written = 1;                                                      \
 549                                                                               \
 550             /* At the beginning of a line, G2 designation is cleared.  */     \
 551             if (var == iso2022jp2 && ch == 0x0a)                              \
 552               set2 = UNSPECIFIED_set;                                         \
 553           }                                                                   \
 554         else                                                                  \
 555           written = __UNKNOWN_10646_CHAR;                                     \
 556       }                                                                       \
 557     /* ISO-2022-JP recommends to encode the newline character always in       \
 558        ASCII since this allows a context-free interpretation of the           \
 559        characters at the beginning of the next line.  Otherwise it would      \
 560        have to be known whether the last line ended using ASCII or            \
 561        JIS X 0201.  */                                                        \
 562     else if (set == JISX0201_Roman_set                                        \
 563              && (__builtin_expect (tag == TAG_none, 1)                        \
 564                  || tag == TAG_language_ja))                                  \
 565       {                                                                       \
 566         unsigned char buf[1];                                                 \
 567         written = ucs4_to_jisx0201 (ch, buf);                                 \
 568         if (written != __UNKNOWN_10646_CHAR)                                  \
 569           {                                                                   \
 570             if (buf[0] > 0x20 && buf[0] < 0x80)                               \
 571               {                                                               \
 572                 *outptr++ = buf[0];                                           \
 573                 written = 1;                                                  \
 574               }                                                               \
 575             else                                                              \
 576               written = __UNKNOWN_10646_CHAR;                                 \
 577           }                                                                   \
 578       }                                                                       \
 579     else if (set == JISX0201_Kana_set                                         \
 580              && (__builtin_expect (tag == TAG_none, 1)                        \
 581                  || tag == TAG_language_ja))                                  \
 582       {                                                                       \
 583         unsigned char buf[1];                                                 \
 584         written = ucs4_to_jisx0201 (ch, buf);                                 \
 585         if (written != __UNKNOWN_10646_CHAR)                                  \
 586           {                                                                   \
 587             if (buf[0] > 0xa0 && buf[0] < 0xe0)                               \
 588               {                                                               \
 589                 *outptr++ = buf[0] - 0x80;                                    \
 590                 written = 1;                                                  \
 591               }                                                               \
 592             else                                                              \
 593               written = __UNKNOWN_10646_CHAR;                                 \
 594           }                                                                   \
 595       }                                                                       \
 596     else                                                                      \
 597       {                                                                       \
 598         if ((set == JISX0208_1978_set || set == JISX0208_1983_set)            \
 599             && (__builtin_expect (tag == TAG_none, 1)                         \
 600                 || tag == TAG_language_ja))                                   \
 601           written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);           \
 602         else if (set == JISX0212_set                                          \
 603                  && (__builtin_expect (tag == TAG_none, 1)                    \
 604                      || tag == TAG_language_ja))                              \
 605           written = ucs4_to_jisx0212 (ch, outptr, outend - outptr);           \
 606         else if (set == GB2312_set                                            \
 607                  && (__builtin_expect (tag == TAG_none, 1)                    \
 608                      || tag == TAG_language_zh))                              \
 609           written = ucs4_to_gb2312 (ch, outptr, outend - outptr);             \
 610         else if (set == KSC5601_set                                           \
 611                  && (__builtin_expect (tag == TAG_none, 1)                    \
 612                      || tag == TAG_language_ko))                              \
 613           written = ucs4_to_ksc5601 (ch, outptr, outend - outptr);            \
 614         else                                                                  \
 615           written = __UNKNOWN_10646_CHAR;                                     \
 616                                                                               \
 617         if (__glibc_unlikely (written == 0))                                  \
 618           {                                                                   \
 619             result = __GCONV_FULL_OUTPUT;                                     \
 620             break;                                                            \
 621           }                                                                   \
 622         else if (written != __UNKNOWN_10646_CHAR)                             \
 623           outptr += written;                                                  \
 624       }                                                                       \
 625                                                                               \
 626     if (written == __UNKNOWN_10646_CHAR                                       \
 627         && __builtin_expect (tag == TAG_none, 1))                             \
 628       {                                                                       \
 629         if (set2 == ISO88591_set)                                             \
 630           {                                                                   \
 631             if (ch >= 0x80 && ch <= 0xff)                                     \
 632               {                                                               \
 633                 if (__glibc_unlikely (outptr + 3 > outend))                   \
 634                   {                                                           \
 635                     result = __GCONV_FULL_OUTPUT;                             \
 636                     break;                                                    \
 637                   }                                                           \
 638                                                                               \
 639                 *outptr++ = ESC;                                              \
 640                 *outptr++ = 'N';                                              \
 641                 *outptr++ = ch & 0x7f;                                        \
 642                 written = 3;                                                  \
 643               }                                                               \
 644           }                                                                   \
 645         else if (set2 == ISO88597_set)                                        \
 646           {                                                                   \
 647             if (__glibc_likely (ch < 0xffff))                                 \
 648               {                                                               \
 649                 const struct gap *rp = from_idx;                              \
 650                                                                               \
 651                 while (ch > rp->end)                                          \
 652                   ++rp;                                                       \
 653                 if (ch >= rp->start)                                          \
 654                   {                                                           \
 655                     unsigned char res =                                       \
 656                       iso88597_from_ucs4[ch - 0xa0 + rp->idx];                \
 657                     if (res != '\0')                                          \
 658                       {                                                       \
 659                         if (__glibc_unlikely (outptr + 3 > outend))           \
 660                           {                                                   \
 661                             result = __GCONV_FULL_OUTPUT;                     \
 662                             break;                                            \
 663                           }                                                   \
 664                                                                               \
 665                         *outptr++ = ESC;                                      \
 666                         *outptr++ = 'N';                                      \
 667                         *outptr++ = res & 0x7f;                               \
 668                         written = 3;                                          \
 669                       }                                                       \
 670                   }                                                           \
 671               }                                                               \
 672           }                                                                   \
 673       }                                                                       \
 674                                                                               \
 675     if (written == __UNKNOWN_10646_CHAR)                                      \
 676       {                                                                       \
 677         /* The attempts to use the currently selected character set           \
 678            failed, either because the language tag changed, or because        \
 679            the character requires a different character set, or because       \
 680            the character is unknown.                                          \
 681            The CJK character sets partially overlap when seen as subsets      \
 682            of ISO 10646; therefore there is no single correct result.         \
 683            We use a preferrence order which depends on the language tag.  */  \
 684                                                                               \
 685         if (ch <= 0x7f)                                                       \
 686           {                                                                   \
 687             /* We must encode using ASCII.  First write out the               \
 688                escape sequence.  */                                           \
 689             if (__glibc_unlikely (outptr + 3 > outend))                       \
 690               {                                                               \
 691                 result = __GCONV_FULL_OUTPUT;                                 \
 692                 break;                                                        \
 693               }                                                               \
 694                                                                               \
 695             *outptr++ = ESC;                                                  \
 696             *outptr++ = '(';                                                  \
 697             *outptr++ = 'B';                                                  \
 698             set = ASCII_set;                                                  \
 699                                                                               \
 700             if (__glibc_unlikely (outptr + 1 > outend))                       \
 701               {                                                               \
 702                 result = __GCONV_FULL_OUTPUT;                                 \
 703                 break;                                                        \
 704               }                                                               \
 705             *outptr++ = ch;                                                   \
 706                                                                               \
 707             /* At the beginning of a line, G2 designation is cleared.  */     \
 708             if (var == iso2022jp2 && ch == 0x0a)                              \
 709               set2 = UNSPECIFIED_set;                                         \
 710           }                                                                   \
 711         else                                                                  \
 712           {                                                                   \
 713             /* Now it becomes difficult.  We must search the other            \
 714                character sets one by one.  Use an ordered conversion          \
 715                list that depends on the current language tag.  */             \
 716             cvlist_t conversion_list;                                         \
 717             unsigned char buf[2];                                             \
 718             int res = __GCONV_ILLEGAL_INPUT;                                  \
 719                                                                               \
 720             if (var == iso2022jp2)                                            \
 721               conversion_list = conversion_lists[tag >> 8];                   \
 722             else                                                              \
 723               conversion_list = CVLIST (japanese, 0, 0, 0, 0);                \
 724                                                                               \
 725             do                                                                \
 726               switch (CVLIST_FIRST (conversion_list))                         \
 727                 {                                                             \
 728                 case european:                                                \
 729                                                                               \
 730                   /* Try ISO 8859-1 upper half.   */                          \
 731                   if (ch >= 0x80 && ch <= 0xff)                               \
 732                     {                                                         \
 733                       if (set2 != ISO88591_set)                               \
 734                         {                                                     \
 735                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 736                             {                                                 \
 737                               res = __GCONV_FULL_OUTPUT;                      \
 738                               break;                                          \
 739                             }                                                 \
 740                           *outptr++ = ESC;                                    \
 741                           *outptr++ = '.';                                    \
 742                           *outptr++ = 'A';                                    \
 743                           set2 = ISO88591_set;                                \
 744                         }                                                     \
 745                                                                               \
 746                       if (__glibc_unlikely (outptr + 3 > outend))             \
 747                         {                                                     \
 748                           res = __GCONV_FULL_OUTPUT;                          \
 749                           break;                                              \
 750                         }                                                     \
 751                       *outptr++ = ESC;                                        \
 752                       *outptr++ = 'N';                                        \
 753                       *outptr++ = ch - 0x80;                                  \
 754                       res = __GCONV_OK;                                       \
 755                       break;                                                  \
 756                     }                                                         \
 757                                                                               \
 758                   /* Try ISO 8859-7 upper half.  */                           \
 759                   if (__glibc_likely (ch < 0xffff))                           \
 760                     {                                                         \
 761                       const struct gap *rp = from_idx;                        \
 762                                                                               \
 763                       while (ch > rp->end)                                    \
 764                         ++rp;                                                 \
 765                       if (ch >= rp->start)                                    \
 766                         {                                                     \
 767                           unsigned char ch2 =                                 \
 768                             iso88597_from_ucs4[ch - 0xa0 + rp->idx];          \
 769                           if (ch2 != '\0')                                    \
 770                             {                                                 \
 771                               if (set2 != ISO88597_set)                       \
 772                                 {                                             \
 773                                   if (__builtin_expect (outptr + 3 > outend,  \
 774                                                         0))                   \
 775                                     {                                         \
 776                                       res = __GCONV_FULL_OUTPUT;              \
 777                                       break;                                  \
 778                                     }                                         \
 779                                   *outptr++ = ESC;                            \
 780                                   *outptr++ = '.';                            \
 781                                   *outptr++ = 'F';                            \
 782                                   set2 = ISO88597_set;                        \
 783                                 }                                             \
 784                                                                               \
 785                               if (__builtin_expect (outptr + 3 > outend, 0))  \
 786                                 {                                             \
 787                                   res = __GCONV_FULL_OUTPUT;                  \
 788                                   break;                                      \
 789                                 }                                             \
 790                               *outptr++ = ESC;                                \
 791                               *outptr++ = 'N';                                \
 792                               *outptr++ = ch2 - 0x80;                         \
 793                               res = __GCONV_OK;                               \
 794                               break;                                          \
 795                             }                                                 \
 796                         }                                                     \
 797                     }                                                         \
 798                                                                               \
 799                   break;                                                      \
 800                                                                               \
 801                 case japanese:                                                \
 802                                                                               \
 803                   /* Try JIS X 0201 Roman.  */                                \
 804                   written = ucs4_to_jisx0201 (ch, buf);                       \
 805                   if (written != __UNKNOWN_10646_CHAR                         \
 806                       && buf[0] > 0x20 && buf[0] < 0x80)                      \
 807                     {                                                         \
 808                       if (set != JISX0201_Roman_set)                          \
 809                         {                                                     \
 810                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 811                             {                                                 \
 812                               res = __GCONV_FULL_OUTPUT;                      \
 813                               break;                                          \
 814                             }                                                 \
 815                           *outptr++ = ESC;                                    \
 816                           *outptr++ = '(';                                    \
 817                           *outptr++ = 'J';                                    \
 818                           set = JISX0201_Roman_set;                           \
 819                         }                                                     \
 820                                                                               \
 821                       if (__glibc_unlikely (outptr + 1 > outend))             \
 822                         {                                                     \
 823                           res = __GCONV_FULL_OUTPUT;                          \
 824                           break;                                              \
 825                         }                                                     \
 826                       *outptr++ = buf[0];                                     \
 827                       res = __GCONV_OK;                                       \
 828                       break;                                                  \
 829                     }                                                         \
 830                                                                               \
 831                   /* Try JIS X 0208.  */                                      \
 832                   written = ucs4_to_jisx0208 (ch, buf, 2);                    \
 833                   if (written != __UNKNOWN_10646_CHAR)                        \
 834                     {                                                         \
 835                       if (set != JISX0208_1983_set)                           \
 836                         {                                                     \
 837                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 838                             {                                                 \
 839                               res = __GCONV_FULL_OUTPUT;                      \
 840                               break;                                          \
 841                             }                                                 \
 842                           *outptr++ = ESC;                                    \
 843                           *outptr++ = '$';                                    \
 844                           *outptr++ = 'B';                                    \
 845                           set = JISX0208_1983_set;                            \
 846                         }                                                     \
 847                                                                               \
 848                       if (__glibc_unlikely (outptr + 2 > outend))             \
 849                         {                                                     \
 850                           res = __GCONV_FULL_OUTPUT;                          \
 851                           break;                                              \
 852                         }                                                     \
 853                       *outptr++ = buf[0];                                     \
 854                       *outptr++ = buf[1];                                     \
 855                       res = __GCONV_OK;                                       \
 856                       break;                                                  \
 857                     }                                                         \
 858                                                                               \
 859                   if (__glibc_unlikely (var == iso2022jp))                    \
 860                     /* Don't use the other Japanese character sets.  */       \
 861                     break;                                                    \
 862                                                                               \
 863                   /* Try JIS X 0212.  */                                      \
 864                   written = ucs4_to_jisx0212 (ch, buf, 2);                    \
 865                   if (written != __UNKNOWN_10646_CHAR)                        \
 866                     {                                                         \
 867                       if (set != JISX0212_set)                                \
 868                         {                                                     \
 869                           if (__builtin_expect (outptr + 4 > outend, 0))      \
 870                             {                                                 \
 871                               res = __GCONV_FULL_OUTPUT;                      \
 872                               break;                                          \
 873                             }                                                 \
 874                           *outptr++ = ESC;                                    \
 875                           *outptr++ = '$';                                    \
 876                           *outptr++ = '(';                                    \
 877                           *outptr++ = 'D';                                    \
 878                           set = JISX0212_set;                                 \
 879                         }                                                     \
 880                                                                               \
 881                       if (__glibc_unlikely (outptr + 2 > outend))             \
 882                         {                                                     \
 883                           res = __GCONV_FULL_OUTPUT;                          \
 884                           break;                                              \
 885                         }                                                     \
 886                       *outptr++ = buf[0];                                     \
 887                       *outptr++ = buf[1];                                     \
 888                       res = __GCONV_OK;                                       \
 889                       break;                                                  \
 890                     }                                                         \
 891                                                                               \
 892                   break;                                                      \
 893                                                                               \
 894                 case chinese:                                                 \
 895                   assert (var == iso2022jp2);                                 \
 896                                                                               \
 897                   /* Try GB 2312.  */                                         \
 898                   written = ucs4_to_gb2312 (ch, buf, 2);                      \
 899                   if (written != __UNKNOWN_10646_CHAR)                        \
 900                     {                                                         \
 901                       if (set != GB2312_set)                                  \
 902                         {                                                     \
 903                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 904                             {                                                 \
 905                               res = __GCONV_FULL_OUTPUT;                      \
 906                               break;                                          \
 907                             }                                                 \
 908                           *outptr++ = ESC;                                    \
 909                           *outptr++ = '$';                                    \
 910                           *outptr++ = 'A';                                    \
 911                           set = GB2312_set;                                   \
 912                         }                                                     \
 913                                                                               \
 914                       if (__glibc_unlikely (outptr + 2 > outend))             \
 915                         {                                                     \
 916                           res = __GCONV_FULL_OUTPUT;                          \
 917                           break;                                              \
 918                         }                                                     \
 919                       *outptr++ = buf[0];                                     \
 920                       *outptr++ = buf[1];                                     \
 921                       res = __GCONV_OK;                                       \
 922                       break;                                                  \
 923                     }                                                         \
 924                                                                               \
 925                   break;                                                      \
 926                                                                               \
 927                 case korean:                                                  \
 928                   assert (var == iso2022jp2);                                 \
 929                                                                               \
 930                   /* Try KSC 5601.  */                                        \
 931                   written = ucs4_to_ksc5601 (ch, buf, 2);                     \
 932                   if (written != __UNKNOWN_10646_CHAR)                        \
 933                     {                                                         \
 934                       if (set != KSC5601_set)                                 \
 935                         {                                                     \
 936                           if (__builtin_expect (outptr + 4 > outend, 0))      \
 937                             {                                                 \
 938                               res = __GCONV_FULL_OUTPUT;                      \
 939                               break;                                          \
 940                             }                                                 \
 941                           *outptr++ = ESC;                                    \
 942                           *outptr++ = '$';                                    \
 943                           *outptr++ = '(';                                    \
 944                           *outptr++ = 'C';                                    \
 945                           set = KSC5601_set;                                  \
 946                         }                                                     \
 947                                                                               \
 948                       if (__glibc_unlikely (outptr + 2 > outend))             \
 949                         {                                                     \
 950                           res = __GCONV_FULL_OUTPUT;                          \
 951                           break;                                              \
 952                         }                                                     \
 953                       *outptr++ = buf[0];                                     \
 954                       *outptr++ = buf[1];                                     \
 955                       res = __GCONV_OK;                                       \
 956                       break;                                                  \
 957                     }                                                         \
 958                                                                               \
 959                   break;                                                      \
 960                                                                               \
 961                 case other:                                                   \
 962                   assert (var == iso2022jp2);                                 \
 963                                                                               \
 964                   /* Try JIS X 0201 Kana.  This is not officially part        \
 965                      of ISO-2022-JP-2, according to RFC 1554.  Therefore      \
 966                      we try this only after all other attempts.  */           \
 967                   written = ucs4_to_jisx0201 (ch, buf);                       \
 968                   if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80)      \
 969                     {                                                         \
 970                       if (set != JISX0201_Kana_set)                           \
 971                         {                                                     \
 972                           if (__builtin_expect (outptr + 3 > outend, 0))      \
 973                             {                                                 \
 974                               res = __GCONV_FULL_OUTPUT;                      \
 975                               break;                                          \
 976                             }                                                 \
 977                           *outptr++ = ESC;                                    \
 978                           *outptr++ = '(';                                    \
 979                           *outptr++ = 'I';                                    \
 980                           set = JISX0201_Kana_set;                            \
 981                         }                                                     \
 982                                                                               \
 983                       if (__glibc_unlikely (outptr + 1 > outend))             \
 984                         {                                                     \
 985                           res = __GCONV_FULL_OUTPUT;                          \
 986                           break;                                              \
 987                         }                                                     \
 988                       *outptr++ = buf[0] - 0x80;                              \
 989                       res = __GCONV_OK;                                       \
 990                       break;                                                  \
 991                     }                                                         \
 992                                                                               \
 993                   break;                                                      \
 994                                                                               \
 995                 default:                                                      \
 996                   abort ();                                                   \
 997                 }                                                             \
 998             while (res == __GCONV_ILLEGAL_INPUT                               \
 999                    && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1000                                                                               \
1001             if (res == __GCONV_FULL_OUTPUT)                                   \
1002               {                                                               \
1003                 result = res;                                                 \
1004                 break;                                                        \
1005               }                                                               \
1006                                                                               \
1007             if (res == __GCONV_ILLEGAL_INPUT)                                 \
1008               {                                                               \
1009                 STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
1010               }                                                               \
1011           }                                                                   \
1012       }                                                                       \
1013                                                                               \
1014     /* Now that we wrote the output increment the input pointer.  */          \
1015     inptr += 4;                                                               \
1016   }
1017 #define LOOP_NEED_FLAGS
1018 #define EXTRA_LOOP_DECLS        , enum variant var, int *setp
1019 #define INIT_PARAMS             int set = *setp & CURRENT_SEL_MASK;           \
1020                                 int set2 = *setp & CURRENT_ASSIGN_MASK;       \
1021                                 int tag = *setp & CURRENT_TAG_MASK;
1022 #define REINIT_PARAMS           do                                            \
1023                                   {                                           \
1024                                     set = *setp & CURRENT_SEL_MASK;           \
1025                                     set2 = *setp & CURRENT_ASSIGN_MASK;       \
1026                                     tag = *setp & CURRENT_TAG_MASK;           \
1027                                   }                                           \
1028                                 while (0)
1029 #define UPDATE_PARAMS           *setp = set | set2 | tag
1030 #include <iconv/loop.c>
1031
1032
1033 /* Now define the toplevel functions.  */
1034 #include <iconv/skeleton.c>