Update copyright dates with scripts/update-copyrights.
[glibc.git] / iconvdata / iso-2022-jp.c
blobd4cbabc5dfefd6c2ba39c38001df2aed3cba121b
1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2 Copyright (C) 1998-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <assert.h>
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "jis0201.h"
27 #include "jis0208.h"
28 #include "jis0212.h"
29 #include "gb2312.h"
30 #include "ksc5601.h"
32 struct gap
34 uint16_t start;
35 uint16_t end;
36 int32_t idx;
39 #include "iso8859-7jp.h"
41 /* This makes obvious what everybody knows: 0x1b is the Esc character. */
42 #define ESC 0x1b
44 /* We provide our own initialization and destructor function. */
45 #define DEFINE_INIT 0
46 #define DEFINE_FINI 0
48 /* Definitions used in the body of the `gconv' function. */
49 #define FROM_LOOP from_iso2022jp_loop
50 #define TO_LOOP to_iso2022jp_loop
51 #define ONE_DIRECTION 0
52 #define FROM_LOOP_MIN_NEEDED_FROM 1
53 #define FROM_LOOP_MAX_NEEDED_FROM 4
54 #define FROM_LOOP_MIN_NEEDED_TO 4
55 #define FROM_LOOP_MAX_NEEDED_TO 4
56 #define TO_LOOP_MIN_NEEDED_FROM 4
57 #define TO_LOOP_MAX_NEEDED_FROM 4
58 #define TO_LOOP_MIN_NEEDED_TO 1
59 #define TO_LOOP_MAX_NEEDED_TO 6
60 #define FROM_DIRECTION (dir == from_iso2022jp)
61 #define PREPARE_LOOP \
62 enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
63 enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
64 int save_set; \
65 int *setp = &data->__statep->__count;
66 #define EXTRA_LOOP_ARGS , var, setp
69 /* Direction of the transformation. */
70 enum direction
72 illegal_dir,
73 to_iso2022jp,
74 from_iso2022jp
77 /* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
78 enum variant
80 illegal_var,
81 iso2022jp,
82 iso2022jp2
86 struct iso2022jp_data
88 enum direction dir;
89 enum variant var;
93 /* The COUNT element of the state keeps track of the currently selected
94 character set. The possible values are: */
95 enum
97 ASCII_set = 0,
98 JISX0208_1978_set = 1 << 3,
99 JISX0208_1983_set = 2 << 3,
100 JISX0201_Roman_set = 3 << 3,
101 JISX0201_Kana_set = 4 << 3,
102 GB2312_set = 5 << 3,
103 KSC5601_set = 6 << 3,
104 JISX0212_set = 7 << 3,
105 CURRENT_SEL_MASK = 7 << 3
108 /* The second value stored is the designation of the G2 set. The following
109 values are possible: */
110 enum
112 UNSPECIFIED_set = 0,
113 ISO88591_set = 1 << 6,
114 ISO88597_set = 2 << 6,
115 CURRENT_ASSIGN_MASK = 3 << 6
118 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
119 describes the language tag parsing status. The possible values are as
120 follows. Values >= TAG_language are temporary tag parsing states. */
121 enum
123 TAG_none = 0,
124 TAG_language = 4 << 8,
125 TAG_language_j = 5 << 8,
126 TAG_language_ja = 1 << 8,
127 TAG_language_k = 6 << 8,
128 TAG_language_ko = 2 << 8,
129 TAG_language_z = 7 << 8,
130 TAG_language_zh = 3 << 8,
131 CURRENT_TAG_MASK = 7 << 8
135 extern int gconv_init (struct __gconv_step *step);
137 gconv_init (struct __gconv_step *step)
139 /* Determine which direction. */
140 struct iso2022jp_data *new_data;
141 enum direction dir = illegal_dir;
142 enum variant var = illegal_var;
143 int result;
145 if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
147 dir = from_iso2022jp;
148 var = iso2022jp;
150 else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
152 dir = to_iso2022jp;
153 var = iso2022jp;
155 else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
157 dir = from_iso2022jp;
158 var = iso2022jp2;
160 else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
162 dir = to_iso2022jp;
163 var = iso2022jp2;
166 result = __GCONV_NOCONV;
167 if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
169 new_data
170 = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
172 result = __GCONV_NOMEM;
173 if (new_data != NULL)
175 new_data->dir = dir;
176 new_data->var = var;
177 step->__data = new_data;
179 if (dir == from_iso2022jp)
181 step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
182 step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
183 step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
184 step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
186 else
188 step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
189 step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
190 step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
191 step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
194 /* Yes, this is a stateful encoding. */
195 step->__stateful = 1;
197 result = __GCONV_OK;
201 return result;
205 extern void gconv_end (struct __gconv_step *data);
206 void
207 gconv_end (struct __gconv_step *data)
209 free (data->__data);
213 /* Since this is a stateful encoding we have to provide code which resets
214 the output state to the initial state. This has to be done during the
215 flushing. */
216 #define EMIT_SHIFT_TO_INIT \
217 /* Avoid warning about unused variable 'var'. */ \
218 (void) var; \
220 if ((data->__statep->__count & ~7) != ASCII_set) \
222 if (dir == from_iso2022jp \
223 || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
225 /* It's easy, we don't have to emit anything, we just reset the \
226 state for the input. Note that this also clears the G2 \
227 designation. */ \
228 data->__statep->__count &= 7; \
229 data->__statep->__count |= ASCII_set; \
231 else \
233 /* We are not in the initial state. To switch back we have \
234 to emit the sequence `Esc ( B'. */ \
235 if (__glibc_unlikely (outbuf + 3 > outend)) \
236 /* We don't have enough room in the output buffer. */ \
237 status = __GCONV_FULL_OUTPUT; \
238 else \
240 /* Write out the shift sequence. */ \
241 *outbuf++ = ESC; \
242 *outbuf++ = '('; \
243 *outbuf++ = 'B'; \
244 /* Note that this also clears the G2 designation. */ \
245 data->__statep->__count &= 7; \
246 data->__statep->__count |= ASCII_set; \
252 /* Since we might have to reset input pointer we must be able to save
253 and retore the state. */
254 #define SAVE_RESET_STATE(Save) \
255 if (Save) \
256 save_set = *setp; \
257 else \
258 *setp = save_set
261 /* First define the conversion function from ISO-2022-JP to UCS4. */
262 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
263 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
264 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
265 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
266 #define LOOPFCT FROM_LOOP
267 #define BODY \
269 uint32_t ch = *inptr; \
271 /* Recognize escape sequences. */ \
272 if (__builtin_expect (ch, 0) == ESC) \
274 /* We now must be prepared to read two to three more \
275 characters. If we have a match in the first character but \
276 then the input buffer ends we terminate with an error since \
277 we must not risk missing an escape sequence just because it \
278 is not entirely in the current input buffer. */ \
279 if (__builtin_expect (inptr + 2 >= inend, 0) \
280 || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
281 && __builtin_expect (inptr + 3 >= inend, 0))) \
283 /* Not enough input available. */ \
284 result = __GCONV_INCOMPLETE_INPUT; \
285 break; \
288 if (inptr[1] == '(') \
290 if (inptr[2] == 'B') \
292 /* ASCII selected. */ \
293 set = ASCII_set; \
294 inptr += 3; \
295 continue; \
297 else if (inptr[2] == 'J') \
299 /* JIS X 0201 selected. */ \
300 set = JISX0201_Roman_set; \
301 inptr += 3; \
302 continue; \
304 else if (var == iso2022jp2 && inptr[2] == 'I') \
306 /* JIS X 0201 selected. */ \
307 set = JISX0201_Kana_set; \
308 inptr += 3; \
309 continue; \
312 else if (inptr[1] == '$') \
314 if (inptr[2] == '@') \
316 /* JIS X 0208-1978 selected. */ \
317 set = JISX0208_1978_set; \
318 inptr += 3; \
319 continue; \
321 else if (inptr[2] == 'B') \
323 /* JIS X 0208-1983 selected. */ \
324 set = JISX0208_1983_set; \
325 inptr += 3; \
326 continue; \
328 else if (var == iso2022jp2) \
330 if (inptr[2] == 'A') \
332 /* GB 2312-1980 selected. */ \
333 set = GB2312_set; \
334 inptr += 3; \
335 continue; \
337 else if (inptr[2] == '(') \
339 if (inptr[3] == 'C') \
341 /* KSC 5601-1987 selected. */ \
342 set = KSC5601_set; \
343 inptr += 4; \
344 continue; \
346 else if (inptr[3] == 'D') \
348 /* JIS X 0212-1990 selected. */ \
349 set = JISX0212_set; \
350 inptr += 4; \
351 continue; \
356 else if (var == iso2022jp2 && inptr[1] == '.') \
358 if (inptr[2] == 'A') \
360 /* ISO 8859-1-GR selected. */ \
361 set2 = ISO88591_set; \
362 inptr += 3; \
363 continue; \
365 else if (inptr[2] == 'F') \
367 /* ISO 8859-7-GR selected. */ \
368 set2 = ISO88597_set; \
369 inptr += 3; \
370 continue; \
375 if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
377 if (set2 == ISO88591_set) \
379 ch = inptr[2] | 0x80; \
380 inptr += 3; \
382 else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
384 /* We use the table from the ISO 8859-7 module. */ \
385 if (inptr[2] < 0x20 || inptr[2] >= 0x80) \
386 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
387 ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
388 if (ch == 0) \
389 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
390 inptr += 3; \
392 else \
394 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
397 else if (ch >= 0x80) \
399 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
401 else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
402 /* Almost done, just advance the input pointer. */ \
403 ++inptr; \
404 else if (set == JISX0201_Roman_set) \
406 /* Use the JIS X 0201 table. */ \
407 ch = jisx0201_to_ucs4 (ch); \
408 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
409 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
410 ++inptr; \
412 else if (set == JISX0201_Kana_set) \
414 /* Use the JIS X 0201 table. */ \
415 ch = jisx0201_to_ucs4 (ch + 0x80); \
416 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
417 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
418 ++inptr; \
420 else \
422 if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
423 /* XXX I don't have the tables for these two old variants of \
424 JIS X 0208. Therefore I'm using the tables for JIS X \
425 0208-1990. If somebody has problems with this please \
426 provide the appropriate tables. */ \
427 ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
428 else if (set == JISX0212_set) \
429 /* Use the JIS X 0212 table. */ \
430 ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
431 else if (set == GB2312_set) \
432 /* Use the GB 2312 table. */ \
433 ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
434 else \
436 assert (set == KSC5601_set); \
438 /* Use the KSC 5601 table. */ \
439 ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
442 if (__glibc_unlikely (ch == 0)) \
444 result = __GCONV_INCOMPLETE_INPUT; \
445 break; \
447 else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
449 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
453 put32 (outptr, ch); \
454 outptr += 4; \
456 #define LOOP_NEED_FLAGS
457 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
458 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
459 int set2 = *setp & CURRENT_ASSIGN_MASK
460 #define UPDATE_PARAMS *setp = set | set2
461 #include <iconv/loop.c>
464 /* Next, define the other direction. */
466 enum conversion { none = 0, european, japanese, chinese, korean, other };
468 /* A datatype for conversion lists. */
469 typedef unsigned int cvlist_t;
470 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
471 ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
472 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
473 #define CVLIST_REST(cvl) ((cvl) >> 3)
474 static const cvlist_t conversion_lists[4] =
476 /* TAG_none */ CVLIST (japanese, european, chinese, korean, other),
477 /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
478 /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
479 /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
482 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
483 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
484 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
485 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
486 #define LOOPFCT TO_LOOP
487 #define BODY \
489 uint32_t ch; \
490 size_t written; \
492 ch = get32 (inptr); \
494 if (var == iso2022jp2) \
496 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
497 if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
499 ch &= 0x7f; \
500 if (ch >= 'A' && ch <= 'Z') \
501 ch += 'a' - 'A'; \
502 if (ch == 0x01) \
503 tag = TAG_language; \
504 else if (ch == 'j' && tag == TAG_language) \
505 tag = TAG_language_j; \
506 else if (ch == 'a' && tag == TAG_language_j) \
507 tag = TAG_language_ja; \
508 else if (ch == 'k' && tag == TAG_language) \
509 tag = TAG_language_k; \
510 else if (ch == 'o' && tag == TAG_language_k) \
511 tag = TAG_language_ko; \
512 else if (ch == 'z' && tag == TAG_language) \
513 tag = TAG_language_z; \
514 else if (ch == 'h' && tag == TAG_language_z) \
515 tag = TAG_language_zh; \
516 else if (ch == 0x7f) \
517 tag = TAG_none; \
518 else \
520 /* Other tag characters reset the tag parsing state (if the \
521 current state is a temporary state) or are ignored (if \
522 the current state is a stable one). */ \
523 if (tag >= TAG_language) \
524 tag = TAG_none; \
527 inptr += 4; \
528 continue; \
531 /* Non-tag characters reset the tag parsing state, if the current \
532 state is a temporary state. */ \
533 if (__glibc_unlikely (tag >= TAG_language)) \
534 tag = TAG_none; \
537 /* First see whether we can write the character using the currently \
538 selected character set. But ignore the selected character set if \
539 the current language tag shows different preferences. */ \
540 if (set == ASCII_set) \
542 /* Please note that the NUL byte is *not* matched if we are not \
543 currently using the ASCII charset. This is because we must \
544 switch to the initial state whenever a NUL byte is written. */ \
545 if (ch <= 0x7f) \
547 *outptr++ = ch; \
548 written = 1; \
550 /* At the beginning of a line, G2 designation is cleared. */ \
551 if (var == iso2022jp2 && ch == 0x0a) \
552 set2 = UNSPECIFIED_set; \
554 else \
555 written = __UNKNOWN_10646_CHAR; \
557 /* ISO-2022-JP recommends to encode the newline character always in \
558 ASCII since this allows a context-free interpretation of the \
559 characters at the beginning of the next line. Otherwise it would \
560 have to be known whether the last line ended using ASCII or \
561 JIS X 0201. */ \
562 else if (set == JISX0201_Roman_set \
563 && (__builtin_expect (tag == TAG_none, 1) \
564 || tag == TAG_language_ja)) \
566 unsigned char buf[1]; \
567 written = ucs4_to_jisx0201 (ch, buf); \
568 if (written != __UNKNOWN_10646_CHAR) \
570 if (buf[0] > 0x20 && buf[0] < 0x80) \
572 *outptr++ = buf[0]; \
573 written = 1; \
575 else \
576 written = __UNKNOWN_10646_CHAR; \
579 else if (set == JISX0201_Kana_set \
580 && (__builtin_expect (tag == TAG_none, 1) \
581 || tag == TAG_language_ja)) \
583 unsigned char buf[1]; \
584 written = ucs4_to_jisx0201 (ch, buf); \
585 if (written != __UNKNOWN_10646_CHAR) \
587 if (buf[0] > 0xa0 && buf[0] < 0xe0) \
589 *outptr++ = buf[0] - 0x80; \
590 written = 1; \
592 else \
593 written = __UNKNOWN_10646_CHAR; \
596 else \
598 if ((set == JISX0208_1978_set || set == JISX0208_1983_set) \
599 && (__builtin_expect (tag == TAG_none, 1) \
600 || tag == TAG_language_ja)) \
601 written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
602 else if (set == JISX0212_set \
603 && (__builtin_expect (tag == TAG_none, 1) \
604 || tag == TAG_language_ja)) \
605 written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
606 else if (set == GB2312_set \
607 && (__builtin_expect (tag == TAG_none, 1) \
608 || tag == TAG_language_zh)) \
609 written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
610 else if (set == KSC5601_set \
611 && (__builtin_expect (tag == TAG_none, 1) \
612 || tag == TAG_language_ko)) \
613 written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
614 else \
615 written = __UNKNOWN_10646_CHAR; \
617 if (__glibc_unlikely (written == 0)) \
619 result = __GCONV_FULL_OUTPUT; \
620 break; \
622 else if (written != __UNKNOWN_10646_CHAR) \
623 outptr += written; \
626 if (written == __UNKNOWN_10646_CHAR \
627 && __builtin_expect (tag == TAG_none, 1)) \
629 if (set2 == ISO88591_set) \
631 if (ch >= 0x80 && ch <= 0xff) \
633 if (__glibc_unlikely (outptr + 3 > outend)) \
635 result = __GCONV_FULL_OUTPUT; \
636 break; \
639 *outptr++ = ESC; \
640 *outptr++ = 'N'; \
641 *outptr++ = ch & 0x7f; \
642 written = 3; \
645 else if (set2 == ISO88597_set) \
647 if (__glibc_likely (ch < 0xffff)) \
649 const struct gap *rp = from_idx; \
651 while (ch > rp->end) \
652 ++rp; \
653 if (ch >= rp->start) \
655 unsigned char res = \
656 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
657 if (res != '\0') \
659 if (__glibc_unlikely (outptr + 3 > outend)) \
661 result = __GCONV_FULL_OUTPUT; \
662 break; \
665 *outptr++ = ESC; \
666 *outptr++ = 'N'; \
667 *outptr++ = res & 0x7f; \
668 written = 3; \
675 if (written == __UNKNOWN_10646_CHAR) \
677 /* The attempts to use the currently selected character set \
678 failed, either because the language tag changed, or because \
679 the character requires a different character set, or because \
680 the character is unknown. \
681 The CJK character sets partially overlap when seen as subsets \
682 of ISO 10646; therefore there is no single correct result. \
683 We use a preferrence order which depends on the language tag. */ \
685 if (ch <= 0x7f) \
687 /* We must encode using ASCII. First write out the \
688 escape sequence. */ \
689 if (__glibc_unlikely (outptr + 3 > outend)) \
691 result = __GCONV_FULL_OUTPUT; \
692 break; \
695 *outptr++ = ESC; \
696 *outptr++ = '('; \
697 *outptr++ = 'B'; \
698 set = ASCII_set; \
700 if (__glibc_unlikely (outptr + 1 > outend)) \
702 result = __GCONV_FULL_OUTPUT; \
703 break; \
705 *outptr++ = ch; \
707 /* At the beginning of a line, G2 designation is cleared. */ \
708 if (var == iso2022jp2 && ch == 0x0a) \
709 set2 = UNSPECIFIED_set; \
711 else \
713 /* Now it becomes difficult. We must search the other \
714 character sets one by one. Use an ordered conversion \
715 list that depends on the current language tag. */ \
716 cvlist_t conversion_list; \
717 unsigned char buf[2]; \
718 int res = __GCONV_ILLEGAL_INPUT; \
720 if (var == iso2022jp2) \
721 conversion_list = conversion_lists[tag >> 8]; \
722 else \
723 conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
725 do \
726 switch (CVLIST_FIRST (conversion_list)) \
728 case european: \
730 /* Try ISO 8859-1 upper half. */ \
731 if (ch >= 0x80 && ch <= 0xff) \
733 if (set2 != ISO88591_set) \
735 if (__builtin_expect (outptr + 3 > outend, 0)) \
737 res = __GCONV_FULL_OUTPUT; \
738 break; \
740 *outptr++ = ESC; \
741 *outptr++ = '.'; \
742 *outptr++ = 'A'; \
743 set2 = ISO88591_set; \
746 if (__glibc_unlikely (outptr + 3 > outend)) \
748 res = __GCONV_FULL_OUTPUT; \
749 break; \
751 *outptr++ = ESC; \
752 *outptr++ = 'N'; \
753 *outptr++ = ch - 0x80; \
754 res = __GCONV_OK; \
755 break; \
758 /* Try ISO 8859-7 upper half. */ \
759 if (__glibc_likely (ch < 0xffff)) \
761 const struct gap *rp = from_idx; \
763 while (ch > rp->end) \
764 ++rp; \
765 if (ch >= rp->start) \
767 unsigned char ch2 = \
768 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
769 if (ch2 != '\0') \
771 if (set2 != ISO88597_set) \
773 if (__builtin_expect (outptr + 3 > outend, \
774 0)) \
776 res = __GCONV_FULL_OUTPUT; \
777 break; \
779 *outptr++ = ESC; \
780 *outptr++ = '.'; \
781 *outptr++ = 'F'; \
782 set2 = ISO88597_set; \
785 if (__builtin_expect (outptr + 3 > outend, 0)) \
787 res = __GCONV_FULL_OUTPUT; \
788 break; \
790 *outptr++ = ESC; \
791 *outptr++ = 'N'; \
792 *outptr++ = ch2 - 0x80; \
793 res = __GCONV_OK; \
794 break; \
799 break; \
801 case japanese: \
803 /* Try JIS X 0201 Roman. */ \
804 written = ucs4_to_jisx0201 (ch, buf); \
805 if (written != __UNKNOWN_10646_CHAR \
806 && buf[0] > 0x20 && buf[0] < 0x80) \
808 if (set != JISX0201_Roman_set) \
810 if (__builtin_expect (outptr + 3 > outend, 0)) \
812 res = __GCONV_FULL_OUTPUT; \
813 break; \
815 *outptr++ = ESC; \
816 *outptr++ = '('; \
817 *outptr++ = 'J'; \
818 set = JISX0201_Roman_set; \
821 if (__glibc_unlikely (outptr + 1 > outend)) \
823 res = __GCONV_FULL_OUTPUT; \
824 break; \
826 *outptr++ = buf[0]; \
827 res = __GCONV_OK; \
828 break; \
831 /* Try JIS X 0208. */ \
832 written = ucs4_to_jisx0208 (ch, buf, 2); \
833 if (written != __UNKNOWN_10646_CHAR) \
835 if (set != JISX0208_1983_set) \
837 if (__builtin_expect (outptr + 3 > outend, 0)) \
839 res = __GCONV_FULL_OUTPUT; \
840 break; \
842 *outptr++ = ESC; \
843 *outptr++ = '$'; \
844 *outptr++ = 'B'; \
845 set = JISX0208_1983_set; \
848 if (__glibc_unlikely (outptr + 2 > outend)) \
850 res = __GCONV_FULL_OUTPUT; \
851 break; \
853 *outptr++ = buf[0]; \
854 *outptr++ = buf[1]; \
855 res = __GCONV_OK; \
856 break; \
859 if (__glibc_unlikely (var == iso2022jp)) \
860 /* Don't use the other Japanese character sets. */ \
861 break; \
863 /* Try JIS X 0212. */ \
864 written = ucs4_to_jisx0212 (ch, buf, 2); \
865 if (written != __UNKNOWN_10646_CHAR) \
867 if (set != JISX0212_set) \
869 if (__builtin_expect (outptr + 4 > outend, 0)) \
871 res = __GCONV_FULL_OUTPUT; \
872 break; \
874 *outptr++ = ESC; \
875 *outptr++ = '$'; \
876 *outptr++ = '('; \
877 *outptr++ = 'D'; \
878 set = JISX0212_set; \
881 if (__glibc_unlikely (outptr + 2 > outend)) \
883 res = __GCONV_FULL_OUTPUT; \
884 break; \
886 *outptr++ = buf[0]; \
887 *outptr++ = buf[1]; \
888 res = __GCONV_OK; \
889 break; \
892 break; \
894 case chinese: \
895 assert (var == iso2022jp2); \
897 /* Try GB 2312. */ \
898 written = ucs4_to_gb2312 (ch, buf, 2); \
899 if (written != __UNKNOWN_10646_CHAR) \
901 if (set != GB2312_set) \
903 if (__builtin_expect (outptr + 3 > outend, 0)) \
905 res = __GCONV_FULL_OUTPUT; \
906 break; \
908 *outptr++ = ESC; \
909 *outptr++ = '$'; \
910 *outptr++ = 'A'; \
911 set = GB2312_set; \
914 if (__glibc_unlikely (outptr + 2 > outend)) \
916 res = __GCONV_FULL_OUTPUT; \
917 break; \
919 *outptr++ = buf[0]; \
920 *outptr++ = buf[1]; \
921 res = __GCONV_OK; \
922 break; \
925 break; \
927 case korean: \
928 assert (var == iso2022jp2); \
930 /* Try KSC 5601. */ \
931 written = ucs4_to_ksc5601 (ch, buf, 2); \
932 if (written != __UNKNOWN_10646_CHAR) \
934 if (set != KSC5601_set) \
936 if (__builtin_expect (outptr + 4 > outend, 0)) \
938 res = __GCONV_FULL_OUTPUT; \
939 break; \
941 *outptr++ = ESC; \
942 *outptr++ = '$'; \
943 *outptr++ = '('; \
944 *outptr++ = 'C'; \
945 set = KSC5601_set; \
948 if (__glibc_unlikely (outptr + 2 > outend)) \
950 res = __GCONV_FULL_OUTPUT; \
951 break; \
953 *outptr++ = buf[0]; \
954 *outptr++ = buf[1]; \
955 res = __GCONV_OK; \
956 break; \
959 break; \
961 case other: \
962 assert (var == iso2022jp2); \
964 /* Try JIS X 0201 Kana. This is not officially part \
965 of ISO-2022-JP-2, according to RFC 1554. Therefore \
966 we try this only after all other attempts. */ \
967 written = ucs4_to_jisx0201 (ch, buf); \
968 if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
970 if (set != JISX0201_Kana_set) \
972 if (__builtin_expect (outptr + 3 > outend, 0)) \
974 res = __GCONV_FULL_OUTPUT; \
975 break; \
977 *outptr++ = ESC; \
978 *outptr++ = '('; \
979 *outptr++ = 'I'; \
980 set = JISX0201_Kana_set; \
983 if (__glibc_unlikely (outptr + 1 > outend)) \
985 res = __GCONV_FULL_OUTPUT; \
986 break; \
988 *outptr++ = buf[0] - 0x80; \
989 res = __GCONV_OK; \
990 break; \
993 break; \
995 default: \
996 abort (); \
998 while (res == __GCONV_ILLEGAL_INPUT \
999 && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1001 if (res == __GCONV_FULL_OUTPUT) \
1003 result = res; \
1004 break; \
1007 if (res == __GCONV_ILLEGAL_INPUT) \
1009 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1014 /* Now that we wrote the output increment the input pointer. */ \
1015 inptr += 4; \
1017 #define LOOP_NEED_FLAGS
1018 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
1019 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1020 int set2 = *setp & CURRENT_ASSIGN_MASK; \
1021 int tag = *setp & CURRENT_TAG_MASK;
1022 #define REINIT_PARAMS do \
1024 set = *setp & CURRENT_SEL_MASK; \
1025 set2 = *setp & CURRENT_ASSIGN_MASK; \
1026 tag = *setp & CURRENT_TAG_MASK; \
1028 while (0)
1029 #define UPDATE_PARAMS *setp = set | set2 | tag
1030 #include <iconv/loop.c>
1033 /* Now define the toplevel functions. */
1034 #include <iconv/skeleton.c>