Include <string.h>.
[glibc/pb-stable.git] / iconvdata / iso-2022-jp.c
blobaf840dd0a79692a65e41c3263ae04ca05fbc234d
1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "jis0201.h"
27 #include "jis0208.h"
28 #include "jis0212.h"
29 #include "gb2312.h"
30 #include "ksc5601.h"
32 struct gap
34 uint16_t start;
35 uint16_t end;
36 int32_t idx;
39 #include "iso8859-7jp.h"
41 /* This makes obvious what everybody knows: 0x1b is the Esc character. */
42 #define ESC 0x1b
44 /* We provide our own initialization and destructor function. */
45 #define DEFINE_INIT 0
46 #define DEFINE_FINI 0
48 /* Definitions used in the body of the `gconv' function. */
49 #define FROM_LOOP from_iso2022jp_loop
50 #define TO_LOOP to_iso2022jp_loop
51 #define MIN_NEEDED_FROM 1
52 #define MAX_NEEDED_FROM 4
53 #define MIN_NEEDED_TO 4
54 #define MAX_NEEDED_TO 4
55 #define FROM_DIRECTION (dir == from_iso2022jp)
56 #define PREPARE_LOOP \
57 enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
58 enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
59 int save_set; \
60 int *setp = &data->__statep->__count;
61 #define EXTRA_LOOP_ARGS , var, setp
64 /* Direction of the transformation. */
65 enum direction
67 illegal_dir,
68 to_iso2022jp,
69 from_iso2022jp
72 /* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
73 enum variant
75 illegal_var,
76 iso2022jp,
77 iso2022jp2
81 struct iso2022jp_data
83 enum direction dir;
84 enum variant var;
88 /* The COUNT element of the state keeps track of the currently selected
89 character set. The possible values are: */
90 enum
92 ASCII_set = 0,
93 JISX0208_1978_set = 1 << 3,
94 JISX0208_1983_set = 2 << 3,
95 JISX0201_Roman_set = 3 << 3,
96 JISX0201_Kana_set = 4 << 3,
97 GB2312_set = 5 << 3,
98 KSC5601_set = 6 << 3,
99 JISX0212_set = 7 << 3,
100 CURRENT_SEL_MASK = 7 << 3
103 /* The second value stored is the designation of the G2 set. The following
104 values are possible: */
105 enum
107 UNSPECIFIED_set = 0,
108 ISO88591_set = 1 << 6,
109 ISO88597_set = 2 << 6,
110 CURRENT_ASSIGN_MASK = 3 << 6
113 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
114 describes the language tag parsing status. The possible values are as
115 follows. Values >= TAG_language are temporary tag parsing states. */
116 enum
118 TAG_none = 0,
119 TAG_language = 4 << 8,
120 TAG_language_j = 5 << 8,
121 TAG_language_ja = 1 << 8,
122 TAG_language_k = 6 << 8,
123 TAG_language_ko = 2 << 8,
124 TAG_language_z = 7 << 8,
125 TAG_language_zh = 3 << 8,
126 CURRENT_TAG_MASK = 7 << 8
130 extern int gconv_init (struct __gconv_step *step);
132 gconv_init (struct __gconv_step *step)
134 /* Determine which direction. */
135 struct iso2022jp_data *new_data;
136 enum direction dir = illegal_dir;
137 enum variant var = illegal_var;
138 int result;
140 if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
142 dir = from_iso2022jp;
143 var = iso2022jp;
145 else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
147 dir = to_iso2022jp;
148 var = iso2022jp;
150 else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
152 dir = from_iso2022jp;
153 var = iso2022jp2;
155 else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
157 dir = to_iso2022jp;
158 var = iso2022jp2;
161 result = __GCONV_NOCONV;
162 if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
164 new_data
165 = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
167 result = __GCONV_NOMEM;
168 if (new_data != NULL)
170 new_data->dir = dir;
171 new_data->var = var;
172 step->__data = new_data;
174 if (dir == from_iso2022jp)
176 step->__min_needed_from = MIN_NEEDED_FROM;
177 step->__max_needed_from = MAX_NEEDED_FROM;
178 step->__min_needed_to = MIN_NEEDED_TO;
179 step->__max_needed_to = MAX_NEEDED_TO;
181 else
183 step->__min_needed_from = MIN_NEEDED_TO;
184 step->__max_needed_from = MAX_NEEDED_TO;
185 step->__min_needed_to = MIN_NEEDED_FROM;
186 step->__max_needed_to = MAX_NEEDED_FROM + 2;
189 /* Yes, this is a stateful encoding. */
190 step->__stateful = 1;
192 result = __GCONV_OK;
196 return result;
200 extern void gconv_end (struct __gconv_step *data);
201 void
202 gconv_end (struct __gconv_step *data)
204 free (data->__data);
208 /* Since this is a stateful encoding we have to provide code which resets
209 the output state to the initial state. This has to be done during the
210 flushing. */
211 #define EMIT_SHIFT_TO_INIT \
212 /* Avoid warning about unused variable 'var'. */ \
213 (void) var; \
215 if ((data->__statep->__count & ~7) != ASCII_set) \
217 if (dir == from_iso2022jp \
218 || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
220 /* It's easy, we don't have to emit anything, we just reset the \
221 state for the input. Note that this also clears the G2 \
222 designation. */ \
223 data->__statep->__count &= 7; \
224 data->__statep->__count |= ASCII_set; \
226 else \
228 /* We are not in the initial state. To switch back we have \
229 to emit the sequence `Esc ( B'. */ \
230 if (__builtin_expect (outbuf + 3 > outend, 0)) \
231 /* We don't have enough room in the output buffer. */ \
232 status = __GCONV_FULL_OUTPUT; \
233 else \
235 /* Write out the shift sequence. */ \
236 *outbuf++ = ESC; \
237 *outbuf++ = '('; \
238 *outbuf++ = 'B'; \
239 /* Note that this also clears the G2 designation. */ \
240 data->__statep->__count &= ~7; \
241 data->__statep->__count |= ASCII_set; \
247 /* Since we might have to reset input pointer we must be able to save
248 and retore the state. */
249 #define SAVE_RESET_STATE(Save) \
250 if (Save) \
251 save_set = *setp; \
252 else \
253 *setp = save_set
256 /* First define the conversion function from ISO-2022-JP to UCS4. */
257 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
258 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
259 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
260 #define LOOPFCT FROM_LOOP
261 #define BODY \
263 uint32_t ch = *inptr; \
265 /* Recognize escape sequences. */ \
266 if (__builtin_expect (ch, 0) == ESC) \
268 /* We now must be prepared to read two to three more \
269 chracters. If we have a match in the first character but \
270 then the input buffer ends we terminate with an error since \
271 we must not risk missing an escape sequence just because it \
272 is not entirely in the current input buffer. */ \
273 if (__builtin_expect (inptr + 2 >= inend, 0) \
274 || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
275 && __builtin_expect (inptr + 3 >= inend, 0))) \
277 /* Not enough input available. */ \
278 result = __GCONV_INCOMPLETE_INPUT; \
279 break; \
282 if (inptr[1] == '(') \
284 if (inptr[2] == 'B') \
286 /* ASCII selected. */ \
287 set = ASCII_set; \
288 inptr += 3; \
289 continue; \
291 else if (inptr[2] == 'J') \
293 /* JIS X 0201 selected. */ \
294 set = JISX0201_Roman_set; \
295 inptr += 3; \
296 continue; \
298 else if (var == iso2022jp2 && inptr[2] == 'I') \
300 /* JIS X 0201 selected. */ \
301 set = JISX0201_Kana_set; \
302 inptr += 3; \
303 continue; \
306 else if (inptr[1] == '$') \
308 if (inptr[2] == '@') \
310 /* JIS X 0208-1978 selected. */ \
311 set = JISX0208_1978_set; \
312 inptr += 3; \
313 continue; \
315 else if (inptr[2] == 'B') \
317 /* JIS X 0208-1983 selected. */ \
318 set = JISX0208_1983_set; \
319 inptr += 3; \
320 continue; \
322 else if (var == iso2022jp2) \
324 if (inptr[2] == 'A') \
326 /* GB 2312-1980 selected. */ \
327 set = GB2312_set; \
328 inptr += 3; \
329 continue; \
331 else if (inptr[2] == '(') \
333 if (inptr[3] == 'C') \
335 /* KSC 5601-1987 selected. */ \
336 set = KSC5601_set; \
337 inptr += 4; \
338 continue; \
340 else if (inptr[3] == 'D') \
342 /* JIS X 0212-1990 selected. */ \
343 set = JISX0212_set; \
344 inptr += 4; \
345 continue; \
350 else if (var == iso2022jp2 && inptr[1] == '.') \
352 if (inptr[2] == 'A') \
354 /* ISO 8859-1-GR selected. */ \
355 set2 = ISO88591_set; \
356 inptr += 3; \
357 continue; \
359 else if (inptr[2] == 'F') \
361 /* ISO 8859-7-GR selected. */ \
362 set2 = ISO88597_set; \
363 inptr += 3; \
364 continue; \
369 if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
371 if (set2 == ISO88591_set) \
373 ch = inptr[2] | 0x80; \
374 inptr += 3; \
376 else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
378 /* We use the table from the ISO 8859-7 module. */ \
379 if (inptr[2] < 0x20 || inptr[2] >= 0x80) \
381 if (! ignore_errors_p ()) \
383 result = __GCONV_ILLEGAL_INPUT; \
384 break; \
387 ++inptr; \
388 ++*irreversible; \
389 continue; \
391 ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
392 if (ch == 0) \
394 if (! ignore_errors_p ()) \
396 result = __GCONV_ILLEGAL_INPUT; \
397 break; \
400 inptr += 3; \
401 ++*irreversible; \
402 continue; \
404 inptr += 3; \
406 else \
408 if (! ignore_errors_p ()) \
410 result = __GCONV_ILLEGAL_INPUT; \
411 break; \
414 ++inptr; \
415 ++*irreversible; \
416 continue; \
419 else if (ch >= 0x80) \
421 if (! ignore_errors_p ()) \
423 result = __GCONV_ILLEGAL_INPUT; \
424 break; \
427 ++inptr; \
428 ++*irreversible; \
429 continue; \
431 else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
432 /* Almost done, just advance the input pointer. */ \
433 ++inptr; \
434 else if (set == JISX0201_Roman_set) \
436 /* Use the JIS X 0201 table. */ \
437 ch = jisx0201_to_ucs4 (ch); \
438 if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR) \
440 if (! ignore_errors_p ()) \
442 result = __GCONV_ILLEGAL_INPUT; \
443 break; \
446 ++inptr; \
447 ++*irreversible; \
448 continue; \
450 ++inptr; \
452 else if (set == JISX0201_Kana_set) \
454 /* Use the JIS X 0201 table. */ \
455 ch = jisx0201_to_ucs4 (ch + 0x80); \
456 if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR) \
458 if (! ignore_errors_p ()) \
460 result = __GCONV_ILLEGAL_INPUT; \
461 break; \
464 ++inptr; \
465 ++*irreversible; \
466 continue; \
468 ++inptr; \
470 else \
472 if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
473 /* XXX I don't have the tables for these two old variants of \
474 JIS X 0208. Therefore I'm using the tables for JIS X \
475 0208-1990. If somebody has problems with this please \
476 provide the appropriate tables. */ \
477 ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
478 else if (set == JISX0212_set) \
479 /* Use the JIS X 0212 table. */ \
480 ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
481 else if (set == GB2312_set) \
482 /* Use the GB 2312 table. */ \
483 ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
484 else \
486 assert (set == KSC5601_set); \
488 /* Use the KSC 5601 table. */ \
489 ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
492 if (__builtin_expect (ch, 1) == 0) \
494 result = __GCONV_INCOMPLETE_INPUT; \
495 break; \
497 else if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR) \
499 if (! ignore_errors_p ()) \
501 result = __GCONV_ILLEGAL_INPUT; \
502 break; \
505 ++inptr; \
506 ++*irreversible; \
507 continue; \
511 put32 (outptr, ch); \
512 outptr += 4; \
514 #define LOOP_NEED_FLAGS
515 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
516 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
517 int set2 = *setp & CURRENT_ASSIGN_MASK
518 #define UPDATE_PARAMS *setp = set | set2
519 #include <iconv/loop.c>
522 /* Next, define the other direction. */
524 enum conversion { none = 0, european, japanese, chinese, korean, other };
526 /* A datatype for conversion lists. */
527 typedef unsigned int cvlist_t;
528 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
529 ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
530 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
531 #define CVLIST_REST(cvl) ((cvl) >> 3)
532 static const cvlist_t conversion_lists[4] =
534 /* TAG_none */ CVLIST (japanese, european, chinese, korean, other),
535 /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
536 /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
537 /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
540 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
541 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
542 #define MAX_NEEDED_OUTPUT (MAX_NEEDED_FROM + 2)
543 #define LOOPFCT TO_LOOP
544 #define BODY \
546 uint32_t ch; \
547 size_t written; \
549 ch = get32 (inptr); \
551 if (var == iso2022jp2) \
553 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
554 if (__builtin_expect ((ch >> 7) == (0xe0000 >> 7), 0)) \
556 ch &= 0x7f; \
557 if (ch >= 'A' && ch <= 'Z') \
558 ch += 'a' - 'A'; \
559 if (ch == 0x01) \
560 tag = TAG_language; \
561 else if (ch == 'j' && tag == TAG_language) \
562 tag = TAG_language_j; \
563 else if (ch == 'a' && tag == TAG_language_j) \
564 tag = TAG_language_ja; \
565 else if (ch == 'k' && tag == TAG_language) \
566 tag = TAG_language_k; \
567 else if (ch == 'o' && tag == TAG_language_k) \
568 tag = TAG_language_ko; \
569 else if (ch == 'z' && tag == TAG_language) \
570 tag = TAG_language_z; \
571 else if (ch == 'h' && tag == TAG_language_z) \
572 tag = TAG_language_zh; \
573 else if (ch == 0x7f) \
574 tag = TAG_none; \
575 else \
577 /* Other tag characters reset the tag parsing state (if the \
578 current state is a temporary state) or are ignored (if \
579 the current state is a stable one). */ \
580 if (tag >= TAG_language) \
581 tag = TAG_none; \
584 inptr += 4; \
585 continue; \
588 /* Non-tag characters reset the tag parsing state, if the current \
589 state is a temporary state. */ \
590 if (__builtin_expect (tag >= TAG_language, 0)) \
591 tag = TAG_none; \
594 /* First see whether we can write the character using the currently \
595 selected character set. But ignore the selected character set if \
596 the current language tag shows different preferences. */ \
597 if (set == ASCII_set) \
599 /* Please note that the NUL byte is *not* matched if we are not \
600 currently using the ASCII charset. This is because we must \
601 switch to the initial state whenever a NUL byte is written. */ \
602 if (ch <= 0x7f) \
604 *outptr++ = ch; \
605 written = 1; \
607 /* At the beginning of a line, G2 designation is cleared. */ \
608 if (var == iso2022jp2 && ch == 0x0a) \
609 set2 = UNSPECIFIED_set; \
611 else \
612 written = __UNKNOWN_10646_CHAR; \
614 /* ISO-2022-JP recommends to encode the newline character always in \
615 ASCII since this allows a context-free interpretation of the \
616 characters at the beginning of the next line. Otherwise it would \
617 have to be known whether the last line ended using ASCII or \
618 JIS X 0201. */ \
619 else if (set == JISX0201_Roman_set \
620 && (__builtin_expect (tag == TAG_none, 1) \
621 || tag == TAG_language_ja)) \
623 unsigned char buf[1]; \
624 written = ucs4_to_jisx0201 (ch, buf); \
625 if (written != __UNKNOWN_10646_CHAR) \
627 if (buf[0] > 0x20 && buf[0] < 0x80) \
629 *outptr++ = buf[0]; \
630 written = 1; \
632 else \
633 written = __UNKNOWN_10646_CHAR; \
636 else if (set == JISX0201_Kana_set \
637 && (__builtin_expect (tag == TAG_none, 1) \
638 || tag == TAG_language_ja)) \
640 unsigned char buf[1]; \
641 written = ucs4_to_jisx0201 (ch, buf); \
642 if (written != __UNKNOWN_10646_CHAR) \
644 if (buf[0] > 0xa0 && buf[0] < 0xe0) \
646 *outptr++ = buf[0] - 0x80; \
647 written = 1; \
649 else \
650 written = __UNKNOWN_10646_CHAR; \
653 else \
655 if ((set == JISX0208_1978_set || set == JISX0208_1983_set) \
656 && (__builtin_expect (tag == TAG_none, 1) \
657 || tag == TAG_language_ja)) \
658 written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
659 else if (set == JISX0212_set \
660 && (__builtin_expect (tag == TAG_none, 1) \
661 || tag == TAG_language_ja)) \
662 written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
663 else if (set == GB2312_set \
664 && (__builtin_expect (tag == TAG_none, 1) \
665 || tag == TAG_language_zh)) \
666 written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
667 else if (set == KSC5601_set \
668 && (__builtin_expect (tag == TAG_none, 1) \
669 || tag == TAG_language_ko)) \
670 written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
671 else \
672 written = __UNKNOWN_10646_CHAR; \
674 if (__builtin_expect (written == 0, 0)) \
676 result = __GCONV_FULL_OUTPUT; \
677 break; \
679 else if (written != __UNKNOWN_10646_CHAR) \
680 outptr += written; \
683 if (written == __UNKNOWN_10646_CHAR \
684 && __builtin_expect (tag == TAG_none, 1)) \
686 if (set2 == ISO88591_set) \
688 if (ch >= 0x80 && ch <= 0xff) \
690 if (__builtin_expect (outptr + 3 > outend, 0)) \
692 result = __GCONV_FULL_OUTPUT; \
693 break; \
696 *outptr++ = ESC; \
697 *outptr++ = 'N'; \
698 *outptr++ = ch & 0x7f; \
699 written = 3; \
702 else if (set2 == ISO88597_set) \
704 const struct gap *rp = from_idx; \
706 while (ch > rp->end) \
707 ++rp; \
708 if (ch >= rp->start) \
710 unsigned char res = iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
711 if (res != '\0') \
713 if (__builtin_expect (outptr + 3 > outend, 0)) \
715 result = __GCONV_FULL_OUTPUT; \
716 break; \
719 *outptr++ = ESC; \
720 *outptr++ = 'N'; \
721 *outptr++ = res; \
722 written = 3; \
728 if (written == __UNKNOWN_10646_CHAR) \
730 /* The attempts to use the currently selected character set \
731 failed, either because the language tag changed, or because \
732 the character requires a different character set, or because \
733 the character is unknown. \
734 The CJK character sets partially overlap when seen as subsets \
735 of ISO 10646; therefore there is no single correct result. \
736 We use a preferrence order which depends on the language tag. */ \
738 if (ch <= 0x7f) \
740 /* We must encode using ASCII. First write out the \
741 escape sequence. */ \
742 if (__builtin_expect (outptr + 3 > outend, 0)) \
744 result = __GCONV_FULL_OUTPUT; \
745 break; \
748 *outptr++ = ESC; \
749 *outptr++ = '('; \
750 *outptr++ = 'B'; \
751 set = ASCII_set; \
753 if (__builtin_expect (outptr + 1 > outend, 0)) \
755 result = __GCONV_FULL_OUTPUT; \
756 break; \
758 *outptr++ = ch; \
760 /* At the beginning of a line, G2 designation is cleared. */ \
761 if (var == iso2022jp2 && ch == 0x0a) \
762 set2 = UNSPECIFIED_set; \
764 else \
766 /* Now it becomes difficult. We must search the other \
767 character sets one by one. Use an ordered conversion \
768 list that depends on the current language tag. */ \
769 cvlist_t conversion_list; \
770 unsigned char buf[2]; \
772 result = __GCONV_ILLEGAL_INPUT; \
774 if (var == iso2022jp2) \
775 conversion_list = conversion_lists[tag >> 8]; \
776 else \
777 conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
779 do \
780 switch (CVLIST_FIRST (conversion_list)) \
782 case european: \
784 /* Try ISO 8859-1 upper half. */ \
785 if (ch >= 0x80 && ch <= 0xff) \
787 if (set2 != ISO88591_set) \
789 if (__builtin_expect (outptr + 3 > outend, 0)) \
791 result = __GCONV_FULL_OUTPUT; \
792 break; \
794 *outptr++ = ESC; \
795 *outptr++ = '.'; \
796 *outptr++ = 'A'; \
797 set2 = ISO88591_set; \
800 if (__builtin_expect (outptr + 3 > outend, 0)) \
802 result = __GCONV_FULL_OUTPUT; \
803 break; \
805 *outptr++ = ESC; \
806 *outptr++ = 'N'; \
807 *outptr++ = ch - 0x80; \
808 result = __GCONV_OK; \
809 break; \
812 /* Try ISO 8859-7 upper half. */ \
814 const struct gap *rp = from_idx; \
816 while (ch > rp->end) \
817 ++rp; \
818 if (ch >= rp->start) \
820 unsigned char res = \
821 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
822 if (res != '\0') \
824 if (set2 != ISO88597_set) \
826 if (__builtin_expect (outptr + 3 > outend, 0))\
828 result = __GCONV_FULL_OUTPUT; \
829 break; \
831 *outptr++ = ESC; \
832 *outptr++ = '.'; \
833 *outptr++ = 'F'; \
834 set2 = ISO88597_set; \
837 if (__builtin_expect (outptr + 3 > outend, 0)) \
839 result = __GCONV_FULL_OUTPUT; \
840 break; \
842 *outptr++ = ESC; \
843 *outptr++ = 'N'; \
844 *outptr++ = res; \
845 result = __GCONV_OK; \
846 break; \
851 break; \
853 case japanese: \
855 /* Try JIS X 0201 Roman. */ \
856 written = ucs4_to_jisx0201 (ch, buf); \
857 if (written != __UNKNOWN_10646_CHAR \
858 && buf[0] > 0x20 && buf[0] < 0x80) \
860 if (set != JISX0201_Roman_set) \
862 if (__builtin_expect (outptr + 3 > outend, 0)) \
864 result = __GCONV_FULL_OUTPUT; \
865 break; \
867 *outptr++ = ESC; \
868 *outptr++ = '('; \
869 *outptr++ = 'J'; \
870 set = JISX0201_Roman_set; \
873 if (__builtin_expect (outptr + 1 > outend, 0)) \
875 result = __GCONV_FULL_OUTPUT; \
876 break; \
878 *outptr++ = buf[0]; \
879 result = __GCONV_OK; \
880 break; \
883 /* Try JIS X 0208. */ \
884 written = ucs4_to_jisx0208 (ch, buf, 2); \
885 if (written != __UNKNOWN_10646_CHAR) \
887 if (set != JISX0208_1983_set) \
889 if (__builtin_expect (outptr + 3 > outend, 0)) \
891 result = __GCONV_FULL_OUTPUT; \
892 break; \
894 *outptr++ = ESC; \
895 *outptr++ = '$'; \
896 *outptr++ = 'B'; \
897 set = JISX0208_1983_set; \
900 if (__builtin_expect (outptr + 2 > outend, 0)) \
902 result = __GCONV_FULL_OUTPUT; \
903 break; \
905 *outptr++ = buf[0]; \
906 *outptr++ = buf[1]; \
907 result = __GCONV_OK; \
908 break; \
911 if (__builtin_expect (var == iso2022jp, 0)) \
912 /* Don't use the other Japanese character sets. */ \
913 break; \
915 /* Try JIS X 0212. */ \
916 written = ucs4_to_jisx0212 (ch, buf, 2); \
917 if (written != __UNKNOWN_10646_CHAR) \
919 if (set != JISX0212_set) \
921 if (__builtin_expect (outptr + 4 > outend, 0)) \
923 result = __GCONV_FULL_OUTPUT; \
924 break; \
926 *outptr++ = ESC; \
927 *outptr++ = '$'; \
928 *outptr++ = '('; \
929 *outptr++ = 'D'; \
930 set = JISX0212_set; \
933 if (__builtin_expect (outptr + 2 > outend, 0)) \
935 result = __GCONV_FULL_OUTPUT; \
936 break; \
938 *outptr++ = buf[0]; \
939 *outptr++ = buf[1]; \
940 result = __GCONV_OK; \
941 break; \
944 break; \
946 case chinese: \
947 assert (var == iso2022jp2); \
949 /* Try GB 2312. */ \
950 written = ucs4_to_gb2312 (ch, buf, 2); \
951 if (written != __UNKNOWN_10646_CHAR) \
953 if (set != GB2312_set) \
955 if (__builtin_expect (outptr + 3 > outend, 0)) \
957 result = __GCONV_FULL_OUTPUT; \
958 break; \
960 *outptr++ = ESC; \
961 *outptr++ = '$'; \
962 *outptr++ = 'A'; \
963 set = GB2312_set; \
966 if (__builtin_expect (outptr + 2 > outend, 0)) \
968 result = __GCONV_FULL_OUTPUT; \
969 break; \
971 *outptr++ = buf[0]; \
972 *outptr++ = buf[1]; \
973 result = __GCONV_OK; \
974 break; \
977 break; \
979 case korean: \
980 assert (var == iso2022jp2); \
982 /* Try KSC 5601. */ \
983 written = ucs4_to_ksc5601 (ch, buf, 2); \
984 if (written != __UNKNOWN_10646_CHAR) \
986 if (set != KSC5601_set) \
988 if (__builtin_expect (outptr + 4 > outend, 0)) \
990 result = __GCONV_FULL_OUTPUT; \
991 break; \
993 *outptr++ = ESC; \
994 *outptr++ = '$'; \
995 *outptr++ = '('; \
996 *outptr++ = 'C'; \
997 set = KSC5601_set; \
1000 if (__builtin_expect (outptr + 2 > outend, 0)) \
1002 result = __GCONV_FULL_OUTPUT; \
1003 break; \
1005 *outptr++ = buf[0]; \
1006 *outptr++ = buf[1]; \
1007 result = __GCONV_OK; \
1008 break; \
1011 break; \
1013 case other: \
1014 assert (var == iso2022jp2); \
1016 /* Try JIS X 0201 Kana. This is not officially part \
1017 of ISO-2022-JP-2, according to RFC 1554. Therefore \
1018 we try this only after all other attempts. */ \
1019 written = ucs4_to_jisx0201 (ch, buf); \
1020 if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
1022 if (set != JISX0201_Kana_set) \
1024 if (__builtin_expect (outptr + 3 > outend, 0)) \
1026 result = __GCONV_FULL_OUTPUT; \
1027 break; \
1029 *outptr++ = ESC; \
1030 *outptr++ = '('; \
1031 *outptr++ = 'I'; \
1032 set = JISX0201_Kana_set; \
1035 if (__builtin_expect (outptr + 1 > outend, 0)) \
1037 result = __GCONV_FULL_OUTPUT; \
1038 break; \
1040 *outptr++ = buf[0] - 0x80; \
1041 result = __GCONV_OK; \
1042 break; \
1045 break; \
1047 default: \
1048 abort (); \
1050 while (result == __GCONV_ILLEGAL_INPUT \
1051 && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1053 if (result == __GCONV_FULL_OUTPUT) \
1054 break; \
1056 if (result == __GCONV_ILLEGAL_INPUT) \
1058 STANDARD_ERR_HANDLER (4); \
1063 /* Now that we wrote the output increment the input pointer. */ \
1064 inptr += 4; \
1066 #define LOOP_NEED_FLAGS
1067 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
1068 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1069 int set2 = *setp & CURRENT_ASSIGN_MASK; \
1070 int tag = *setp & CURRENT_TAG_MASK;
1071 #define UPDATE_PARAMS *setp = set | set2 | tag
1072 #include <iconv/loop.c>
1075 /* Now define the toplevel functions. */
1076 #include <iconv/skeleton.c>