AArch64: Optimize memcmp
[glibc.git] / iconvdata / iso-2022-jp.c
blob2a32736542a7580f5f6debde9c4d1244b2fd5837
1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2 Copyright (C) 1998-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <dlfcn.h>
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "jis0201.h"
26 #include "jis0208.h"
27 #include "jis0212.h"
28 #include "gb2312.h"
29 #include "ksc5601.h"
31 struct gap
33 uint16_t start;
34 uint16_t end;
35 int32_t idx;
38 #include "iso8859-7jp.h"
40 /* This makes obvious what everybody knows: 0x1b is the Esc character. */
41 #define ESC 0x1b
43 /* We provide our own initialization and destructor function. */
44 #define DEFINE_INIT 0
45 #define DEFINE_FINI 0
47 /* Definitions used in the body of the `gconv' function. */
48 #define FROM_LOOP from_iso2022jp_loop
49 #define TO_LOOP to_iso2022jp_loop
50 #define ONE_DIRECTION 0
51 #define FROM_LOOP_MIN_NEEDED_FROM 1
52 #define FROM_LOOP_MAX_NEEDED_FROM 4
53 #define FROM_LOOP_MIN_NEEDED_TO 4
54 #define FROM_LOOP_MAX_NEEDED_TO 4
55 #define TO_LOOP_MIN_NEEDED_FROM 4
56 #define TO_LOOP_MAX_NEEDED_FROM 4
57 #define TO_LOOP_MIN_NEEDED_TO 1
58 #define TO_LOOP_MAX_NEEDED_TO 6
59 #define FROM_DIRECTION (dir == from_iso2022jp)
60 #define PREPARE_LOOP \
61 enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
62 enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
63 int save_set; \
64 int *setp = &data->__statep->__count;
65 #define EXTRA_LOOP_ARGS , var, setp
68 /* Direction of the transformation. */
69 enum direction
71 illegal_dir,
72 to_iso2022jp,
73 from_iso2022jp
76 /* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
77 enum variant
79 illegal_var,
80 iso2022jp,
81 iso2022jp2
85 struct iso2022jp_data
87 enum direction dir;
88 enum variant var;
92 /* The COUNT element of the state keeps track of the currently selected
93 character set. The possible values are: */
94 enum
96 ASCII_set = 0,
97 JISX0208_1978_set = 1 << 3,
98 JISX0208_1983_set = 2 << 3,
99 JISX0201_Roman_set = 3 << 3,
100 JISX0201_Kana_set = 4 << 3,
101 GB2312_set = 5 << 3,
102 KSC5601_set = 6 << 3,
103 JISX0212_set = 7 << 3,
104 CURRENT_SEL_MASK = 7 << 3
107 /* The second value stored is the designation of the G2 set. The following
108 values are possible: */
109 enum
111 UNSPECIFIED_set = 0,
112 ISO88591_set = 1 << 6,
113 ISO88597_set = 2 << 6,
114 CURRENT_ASSIGN_MASK = 3 << 6
117 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
118 describes the language tag parsing status. The possible values are as
119 follows. Values >= TAG_language are temporary tag parsing states. */
120 enum
122 TAG_none = 0,
123 TAG_language = 4 << 8,
124 TAG_language_j = 5 << 8,
125 TAG_language_ja = 1 << 8,
126 TAG_language_k = 6 << 8,
127 TAG_language_ko = 2 << 8,
128 TAG_language_z = 7 << 8,
129 TAG_language_zh = 3 << 8,
130 CURRENT_TAG_MASK = 7 << 8
134 extern int gconv_init (struct __gconv_step *step);
136 gconv_init (struct __gconv_step *step)
138 /* Determine which direction. */
139 struct iso2022jp_data *new_data;
140 enum direction dir = illegal_dir;
141 enum variant var = illegal_var;
142 int result;
144 if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
146 dir = from_iso2022jp;
147 var = iso2022jp;
149 else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
151 dir = to_iso2022jp;
152 var = iso2022jp;
154 else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
156 dir = from_iso2022jp;
157 var = iso2022jp2;
159 else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
161 dir = to_iso2022jp;
162 var = iso2022jp2;
165 result = __GCONV_NOCONV;
166 if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
168 new_data
169 = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
171 result = __GCONV_NOMEM;
172 if (new_data != NULL)
174 new_data->dir = dir;
175 new_data->var = var;
176 step->__data = new_data;
178 if (dir == from_iso2022jp)
180 step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
181 step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
182 step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
183 step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
185 else
187 step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
188 step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
189 step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
190 step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
193 /* Yes, this is a stateful encoding. */
194 step->__stateful = 1;
196 result = __GCONV_OK;
200 return result;
204 extern void gconv_end (struct __gconv_step *data);
205 void
206 gconv_end (struct __gconv_step *data)
208 free (data->__data);
212 /* Since this is a stateful encoding we have to provide code which resets
213 the output state to the initial state. This has to be done during the
214 flushing. */
215 #define EMIT_SHIFT_TO_INIT \
216 /* Avoid warning about unused variable 'var'. */ \
217 (void) var; \
219 if ((data->__statep->__count & ~7) != ASCII_set) \
221 if (dir == from_iso2022jp \
222 || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
224 /* It's easy, we don't have to emit anything, we just reset the \
225 state for the input. Note that this also clears the G2 \
226 designation. */ \
227 data->__statep->__count &= 7; \
228 data->__statep->__count |= ASCII_set; \
230 else \
232 /* We are not in the initial state. To switch back we have \
233 to emit the sequence `Esc ( B'. */ \
234 if (__glibc_unlikely (outbuf + 3 > outend)) \
235 /* We don't have enough room in the output buffer. */ \
236 status = __GCONV_FULL_OUTPUT; \
237 else \
239 /* Write out the shift sequence. */ \
240 *outbuf++ = ESC; \
241 *outbuf++ = '('; \
242 *outbuf++ = 'B'; \
243 /* Note that this also clears the G2 designation. */ \
244 data->__statep->__count &= 7; \
245 data->__statep->__count |= ASCII_set; \
251 /* Since we might have to reset input pointer we must be able to save
252 and retore the state. */
253 #define SAVE_RESET_STATE(Save) \
254 if (Save) \
255 save_set = *setp; \
256 else \
257 *setp = save_set
260 /* First define the conversion function from ISO-2022-JP to UCS4. */
261 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
262 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
263 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
264 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
265 #define LOOPFCT FROM_LOOP
266 #define BODY \
268 uint32_t ch = *inptr; \
270 /* Recognize escape sequences. */ \
271 if (__builtin_expect (ch, 0) == ESC) \
273 /* We now must be prepared to read two to three more \
274 characters. If we have a match in the first character but \
275 then the input buffer ends we terminate with an error since \
276 we must not risk missing an escape sequence just because it \
277 is not entirely in the current input buffer. */ \
278 if (__builtin_expect (inptr + 2 >= inend, 0) \
279 || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
280 && __builtin_expect (inptr + 3 >= inend, 0))) \
282 /* Not enough input available. */ \
283 result = __GCONV_INCOMPLETE_INPUT; \
284 break; \
287 if (inptr[1] == '(') \
289 if (inptr[2] == 'B') \
291 /* ASCII selected. */ \
292 set = ASCII_set; \
293 inptr += 3; \
294 continue; \
296 else if (inptr[2] == 'J') \
298 /* JIS X 0201 selected. */ \
299 set = JISX0201_Roman_set; \
300 inptr += 3; \
301 continue; \
303 else if (var == iso2022jp2 && inptr[2] == 'I') \
305 /* JIS X 0201 selected. */ \
306 set = JISX0201_Kana_set; \
307 inptr += 3; \
308 continue; \
311 else if (inptr[1] == '$') \
313 if (inptr[2] == '@') \
315 /* JIS X 0208-1978 selected. */ \
316 set = JISX0208_1978_set; \
317 inptr += 3; \
318 continue; \
320 else if (inptr[2] == 'B') \
322 /* JIS X 0208-1983 selected. */ \
323 set = JISX0208_1983_set; \
324 inptr += 3; \
325 continue; \
327 else if (var == iso2022jp2) \
329 if (inptr[2] == 'A') \
331 /* GB 2312-1980 selected. */ \
332 set = GB2312_set; \
333 inptr += 3; \
334 continue; \
336 else if (inptr[2] == '(') \
338 if (inptr[3] == 'C') \
340 /* KSC 5601-1987 selected. */ \
341 set = KSC5601_set; \
342 inptr += 4; \
343 continue; \
345 else if (inptr[3] == 'D') \
347 /* JIS X 0212-1990 selected. */ \
348 set = JISX0212_set; \
349 inptr += 4; \
350 continue; \
355 else if (var == iso2022jp2 && inptr[1] == '.') \
357 if (inptr[2] == 'A') \
359 /* ISO 8859-1-GR selected. */ \
360 set2 = ISO88591_set; \
361 inptr += 3; \
362 continue; \
364 else if (inptr[2] == 'F') \
366 /* ISO 8859-7-GR selected. */ \
367 set2 = ISO88597_set; \
368 inptr += 3; \
369 continue; \
374 if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
376 if (set2 == ISO88591_set) \
378 ch = inptr[2] | 0x80; \
379 inptr += 3; \
381 else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
383 /* We use the table from the ISO 8859-7 module. */ \
384 if (inptr[2] < 0x20 || inptr[2] >= 0x80) \
385 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
386 ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
387 if (ch == 0) \
388 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
389 inptr += 3; \
391 else \
393 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
396 else if (ch >= 0x80) \
398 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
400 else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
401 /* Almost done, just advance the input pointer. */ \
402 ++inptr; \
403 else if (set == JISX0201_Roman_set) \
405 /* Use the JIS X 0201 table. */ \
406 ch = jisx0201_to_ucs4 (ch); \
407 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
408 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
409 ++inptr; \
411 else if (set == JISX0201_Kana_set) \
413 /* Use the JIS X 0201 table. */ \
414 ch = jisx0201_to_ucs4 (ch + 0x80); \
415 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
416 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
417 ++inptr; \
419 else \
421 if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
422 /* XXX I don't have the tables for these two old variants of \
423 JIS X 0208. Therefore I'm using the tables for JIS X \
424 0208-1990. If somebody has problems with this please \
425 provide the appropriate tables. */ \
426 ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
427 else if (set == JISX0212_set) \
428 /* Use the JIS X 0212 table. */ \
429 ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
430 else if (set == GB2312_set) \
431 /* Use the GB 2312 table. */ \
432 ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
433 else \
435 assert (set == KSC5601_set); \
437 /* Use the KSC 5601 table. */ \
438 ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
441 if (__glibc_unlikely (ch == 0)) \
443 result = __GCONV_INCOMPLETE_INPUT; \
444 break; \
446 else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
448 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
452 put32 (outptr, ch); \
453 outptr += 4; \
455 #define LOOP_NEED_FLAGS
456 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
457 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
458 int set2 = *setp & CURRENT_ASSIGN_MASK
459 #define UPDATE_PARAMS *setp = set | set2
460 #include <iconv/loop.c>
463 /* Next, define the other direction. */
465 enum conversion { none = 0, european, japanese, chinese, korean, other };
467 /* A datatype for conversion lists. */
468 typedef unsigned int cvlist_t;
469 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
470 ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
471 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
472 #define CVLIST_REST(cvl) ((cvl) >> 3)
473 static const cvlist_t conversion_lists[4] =
475 /* TAG_none */ CVLIST (japanese, european, chinese, korean, other),
476 /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
477 /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
478 /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
481 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
482 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
483 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
484 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
485 #define LOOPFCT TO_LOOP
486 #define BODY \
488 uint32_t ch; \
489 size_t written; \
491 ch = get32 (inptr); \
493 if (var == iso2022jp2) \
495 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
496 if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
498 ch &= 0x7f; \
499 if (ch >= 'A' && ch <= 'Z') \
500 ch += 'a' - 'A'; \
501 if (ch == 0x01) \
502 tag = TAG_language; \
503 else if (ch == 'j' && tag == TAG_language) \
504 tag = TAG_language_j; \
505 else if (ch == 'a' && tag == TAG_language_j) \
506 tag = TAG_language_ja; \
507 else if (ch == 'k' && tag == TAG_language) \
508 tag = TAG_language_k; \
509 else if (ch == 'o' && tag == TAG_language_k) \
510 tag = TAG_language_ko; \
511 else if (ch == 'z' && tag == TAG_language) \
512 tag = TAG_language_z; \
513 else if (ch == 'h' && tag == TAG_language_z) \
514 tag = TAG_language_zh; \
515 else if (ch == 0x7f) \
516 tag = TAG_none; \
517 else \
519 /* Other tag characters reset the tag parsing state (if the \
520 current state is a temporary state) or are ignored (if \
521 the current state is a stable one). */ \
522 if (tag >= TAG_language) \
523 tag = TAG_none; \
526 inptr += 4; \
527 continue; \
530 /* Non-tag characters reset the tag parsing state, if the current \
531 state is a temporary state. */ \
532 if (__glibc_unlikely (tag >= TAG_language)) \
533 tag = TAG_none; \
536 /* First see whether we can write the character using the currently \
537 selected character set. But ignore the selected character set if \
538 the current language tag shows different preferences. */ \
539 if (set == ASCII_set) \
541 /* Please note that the NUL byte is *not* matched if we are not \
542 currently using the ASCII charset. This is because we must \
543 switch to the initial state whenever a NUL byte is written. */ \
544 if (ch <= 0x7f) \
546 *outptr++ = ch; \
547 written = 1; \
549 /* At the beginning of a line, G2 designation is cleared. */ \
550 if (var == iso2022jp2 && ch == 0x0a) \
551 set2 = UNSPECIFIED_set; \
553 else \
554 written = __UNKNOWN_10646_CHAR; \
556 /* ISO-2022-JP recommends to encode the newline character always in \
557 ASCII since this allows a context-free interpretation of the \
558 characters at the beginning of the next line. Otherwise it would \
559 have to be known whether the last line ended using ASCII or \
560 JIS X 0201. */ \
561 else if (set == JISX0201_Roman_set \
562 && (__builtin_expect (tag == TAG_none, 1) \
563 || tag == TAG_language_ja)) \
565 unsigned char buf[1]; \
566 written = ucs4_to_jisx0201 (ch, buf); \
567 if (written != __UNKNOWN_10646_CHAR) \
569 if (buf[0] > 0x20 && buf[0] < 0x80) \
571 *outptr++ = buf[0]; \
572 written = 1; \
574 else \
575 written = __UNKNOWN_10646_CHAR; \
578 else if (set == JISX0201_Kana_set \
579 && (__builtin_expect (tag == TAG_none, 1) \
580 || tag == TAG_language_ja)) \
582 unsigned char buf[1]; \
583 written = ucs4_to_jisx0201 (ch, buf); \
584 if (written != __UNKNOWN_10646_CHAR) \
586 if (buf[0] > 0xa0 && buf[0] < 0xe0) \
588 *outptr++ = buf[0] - 0x80; \
589 written = 1; \
591 else \
592 written = __UNKNOWN_10646_CHAR; \
595 else \
597 if ((set == JISX0208_1978_set || set == JISX0208_1983_set) \
598 && (__builtin_expect (tag == TAG_none, 1) \
599 || tag == TAG_language_ja)) \
600 written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
601 else if (set == JISX0212_set \
602 && (__builtin_expect (tag == TAG_none, 1) \
603 || tag == TAG_language_ja)) \
604 written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
605 else if (set == GB2312_set \
606 && (__builtin_expect (tag == TAG_none, 1) \
607 || tag == TAG_language_zh)) \
608 written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
609 else if (set == KSC5601_set \
610 && (__builtin_expect (tag == TAG_none, 1) \
611 || tag == TAG_language_ko)) \
612 written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
613 else \
614 written = __UNKNOWN_10646_CHAR; \
616 if (__glibc_unlikely (written == 0)) \
618 result = __GCONV_FULL_OUTPUT; \
619 break; \
621 else if (written != __UNKNOWN_10646_CHAR) \
622 outptr += written; \
625 if (written == __UNKNOWN_10646_CHAR \
626 && __builtin_expect (tag == TAG_none, 1)) \
628 if (set2 == ISO88591_set) \
630 if (ch >= 0x80 && ch <= 0xff) \
632 if (__glibc_unlikely (outptr + 3 > outend)) \
634 result = __GCONV_FULL_OUTPUT; \
635 break; \
638 *outptr++ = ESC; \
639 *outptr++ = 'N'; \
640 *outptr++ = ch & 0x7f; \
641 written = 3; \
644 else if (set2 == ISO88597_set) \
646 if (__glibc_likely (ch < 0xffff)) \
648 const struct gap *rp = from_idx; \
650 while (ch > rp->end) \
651 ++rp; \
652 if (ch >= rp->start) \
654 unsigned char res = \
655 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
656 if (res != '\0') \
658 if (__glibc_unlikely (outptr + 3 > outend)) \
660 result = __GCONV_FULL_OUTPUT; \
661 break; \
664 *outptr++ = ESC; \
665 *outptr++ = 'N'; \
666 *outptr++ = res & 0x7f; \
667 written = 3; \
674 if (written == __UNKNOWN_10646_CHAR) \
676 /* The attempts to use the currently selected character set \
677 failed, either because the language tag changed, or because \
678 the character requires a different character set, or because \
679 the character is unknown. \
680 The CJK character sets partially overlap when seen as subsets \
681 of ISO 10646; therefore there is no single correct result. \
682 We use a preferrence order which depends on the language tag. */ \
684 if (ch <= 0x7f) \
686 /* We must encode using ASCII. First write out the \
687 escape sequence. */ \
688 if (__glibc_unlikely (outptr + 3 > outend)) \
690 result = __GCONV_FULL_OUTPUT; \
691 break; \
694 *outptr++ = ESC; \
695 *outptr++ = '('; \
696 *outptr++ = 'B'; \
697 set = ASCII_set; \
699 if (__glibc_unlikely (outptr + 1 > outend)) \
701 result = __GCONV_FULL_OUTPUT; \
702 break; \
704 *outptr++ = ch; \
706 /* At the beginning of a line, G2 designation is cleared. */ \
707 if (var == iso2022jp2 && ch == 0x0a) \
708 set2 = UNSPECIFIED_set; \
710 else \
712 /* Now it becomes difficult. We must search the other \
713 character sets one by one. Use an ordered conversion \
714 list that depends on the current language tag. */ \
715 cvlist_t conversion_list; \
716 unsigned char buf[2]; \
717 int res = __GCONV_ILLEGAL_INPUT; \
719 if (var == iso2022jp2) \
720 conversion_list = conversion_lists[tag >> 8]; \
721 else \
722 conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
724 do \
725 switch (CVLIST_FIRST (conversion_list)) \
727 case european: \
729 /* Try ISO 8859-1 upper half. */ \
730 if (ch >= 0x80 && ch <= 0xff) \
732 if (set2 != ISO88591_set) \
734 if (__builtin_expect (outptr + 3 > outend, 0)) \
736 res = __GCONV_FULL_OUTPUT; \
737 break; \
739 *outptr++ = ESC; \
740 *outptr++ = '.'; \
741 *outptr++ = 'A'; \
742 set2 = ISO88591_set; \
745 if (__glibc_unlikely (outptr + 3 > outend)) \
747 res = __GCONV_FULL_OUTPUT; \
748 break; \
750 *outptr++ = ESC; \
751 *outptr++ = 'N'; \
752 *outptr++ = ch - 0x80; \
753 res = __GCONV_OK; \
754 break; \
757 /* Try ISO 8859-7 upper half. */ \
758 if (__glibc_likely (ch < 0xffff)) \
760 const struct gap *rp = from_idx; \
762 while (ch > rp->end) \
763 ++rp; \
764 if (ch >= rp->start) \
766 unsigned char ch2 = \
767 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
768 if (ch2 != '\0') \
770 if (set2 != ISO88597_set) \
772 if (__builtin_expect (outptr + 3 > outend, \
773 0)) \
775 res = __GCONV_FULL_OUTPUT; \
776 break; \
778 *outptr++ = ESC; \
779 *outptr++ = '.'; \
780 *outptr++ = 'F'; \
781 set2 = ISO88597_set; \
784 if (__builtin_expect (outptr + 3 > outend, 0)) \
786 res = __GCONV_FULL_OUTPUT; \
787 break; \
789 *outptr++ = ESC; \
790 *outptr++ = 'N'; \
791 *outptr++ = ch2 - 0x80; \
792 res = __GCONV_OK; \
793 break; \
798 break; \
800 case japanese: \
802 /* Try JIS X 0201 Roman. */ \
803 written = ucs4_to_jisx0201 (ch, buf); \
804 if (written != __UNKNOWN_10646_CHAR \
805 && buf[0] > 0x20 && buf[0] < 0x80) \
807 if (set != JISX0201_Roman_set) \
809 if (__builtin_expect (outptr + 3 > outend, 0)) \
811 res = __GCONV_FULL_OUTPUT; \
812 break; \
814 *outptr++ = ESC; \
815 *outptr++ = '('; \
816 *outptr++ = 'J'; \
817 set = JISX0201_Roman_set; \
820 if (__glibc_unlikely (outptr + 1 > outend)) \
822 res = __GCONV_FULL_OUTPUT; \
823 break; \
825 *outptr++ = buf[0]; \
826 res = __GCONV_OK; \
827 break; \
830 /* Try JIS X 0208. */ \
831 written = ucs4_to_jisx0208 (ch, buf, 2); \
832 if (written != __UNKNOWN_10646_CHAR) \
834 if (set != JISX0208_1983_set) \
836 if (__builtin_expect (outptr + 3 > outend, 0)) \
838 res = __GCONV_FULL_OUTPUT; \
839 break; \
841 *outptr++ = ESC; \
842 *outptr++ = '$'; \
843 *outptr++ = 'B'; \
844 set = JISX0208_1983_set; \
847 if (__glibc_unlikely (outptr + 2 > outend)) \
849 res = __GCONV_FULL_OUTPUT; \
850 break; \
852 *outptr++ = buf[0]; \
853 *outptr++ = buf[1]; \
854 res = __GCONV_OK; \
855 break; \
858 if (__glibc_unlikely (var == iso2022jp)) \
859 /* Don't use the other Japanese character sets. */ \
860 break; \
862 /* Try JIS X 0212. */ \
863 written = ucs4_to_jisx0212 (ch, buf, 2); \
864 if (written != __UNKNOWN_10646_CHAR) \
866 if (set != JISX0212_set) \
868 if (__builtin_expect (outptr + 4 > outend, 0)) \
870 res = __GCONV_FULL_OUTPUT; \
871 break; \
873 *outptr++ = ESC; \
874 *outptr++ = '$'; \
875 *outptr++ = '('; \
876 *outptr++ = 'D'; \
877 set = JISX0212_set; \
880 if (__glibc_unlikely (outptr + 2 > outend)) \
882 res = __GCONV_FULL_OUTPUT; \
883 break; \
885 *outptr++ = buf[0]; \
886 *outptr++ = buf[1]; \
887 res = __GCONV_OK; \
888 break; \
891 break; \
893 case chinese: \
894 assert (var == iso2022jp2); \
896 /* Try GB 2312. */ \
897 written = ucs4_to_gb2312 (ch, buf, 2); \
898 if (written != __UNKNOWN_10646_CHAR) \
900 if (set != GB2312_set) \
902 if (__builtin_expect (outptr + 3 > outend, 0)) \
904 res = __GCONV_FULL_OUTPUT; \
905 break; \
907 *outptr++ = ESC; \
908 *outptr++ = '$'; \
909 *outptr++ = 'A'; \
910 set = GB2312_set; \
913 if (__glibc_unlikely (outptr + 2 > outend)) \
915 res = __GCONV_FULL_OUTPUT; \
916 break; \
918 *outptr++ = buf[0]; \
919 *outptr++ = buf[1]; \
920 res = __GCONV_OK; \
921 break; \
924 break; \
926 case korean: \
927 assert (var == iso2022jp2); \
929 /* Try KSC 5601. */ \
930 written = ucs4_to_ksc5601 (ch, buf, 2); \
931 if (written != __UNKNOWN_10646_CHAR) \
933 if (set != KSC5601_set) \
935 if (__builtin_expect (outptr + 4 > outend, 0)) \
937 res = __GCONV_FULL_OUTPUT; \
938 break; \
940 *outptr++ = ESC; \
941 *outptr++ = '$'; \
942 *outptr++ = '('; \
943 *outptr++ = 'C'; \
944 set = KSC5601_set; \
947 if (__glibc_unlikely (outptr + 2 > outend)) \
949 res = __GCONV_FULL_OUTPUT; \
950 break; \
952 *outptr++ = buf[0]; \
953 *outptr++ = buf[1]; \
954 res = __GCONV_OK; \
955 break; \
958 break; \
960 case other: \
961 assert (var == iso2022jp2); \
963 /* Try JIS X 0201 Kana. This is not officially part \
964 of ISO-2022-JP-2, according to RFC 1554. Therefore \
965 we try this only after all other attempts. */ \
966 written = ucs4_to_jisx0201 (ch, buf); \
967 if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
969 if (set != JISX0201_Kana_set) \
971 if (__builtin_expect (outptr + 3 > outend, 0)) \
973 res = __GCONV_FULL_OUTPUT; \
974 break; \
976 *outptr++ = ESC; \
977 *outptr++ = '('; \
978 *outptr++ = 'I'; \
979 set = JISX0201_Kana_set; \
982 if (__glibc_unlikely (outptr + 1 > outend)) \
984 res = __GCONV_FULL_OUTPUT; \
985 break; \
987 *outptr++ = buf[0] - 0x80; \
988 res = __GCONV_OK; \
989 break; \
992 break; \
994 default: \
995 abort (); \
997 while (res == __GCONV_ILLEGAL_INPUT \
998 && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1000 if (res == __GCONV_FULL_OUTPUT) \
1002 result = res; \
1003 break; \
1006 if (res == __GCONV_ILLEGAL_INPUT) \
1008 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1013 /* Now that we wrote the output increment the input pointer. */ \
1014 inptr += 4; \
1016 #define LOOP_NEED_FLAGS
1017 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
1018 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1019 int set2 = *setp & CURRENT_ASSIGN_MASK; \
1020 int tag = *setp & CURRENT_TAG_MASK;
1021 #define REINIT_PARAMS do \
1023 set = *setp & CURRENT_SEL_MASK; \
1024 set2 = *setp & CURRENT_ASSIGN_MASK; \
1025 tag = *setp & CURRENT_TAG_MASK; \
1027 while (0)
1028 #define UPDATE_PARAMS *setp = set | set2 | tag
1029 #include <iconv/loop.c>
1032 /* Now define the toplevel functions. */
1033 #include <iconv/skeleton.c>