1 /* Conversion to and from the various ISO 646 CCS.
2 Copyright (C) 1998-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
20 /* The implementation of the conversion which can be performed by this
21 module are not very sophisticated and not tuned at all. There are
22 zillions of ISO 646 derivates and supporting them all in a separate
23 module is overkill since these coded character sets are hardly ever
24 used anymore (except ANSI_X3.4-1968 == ASCII, which is compatible
25 with ISO 8859-1). The European variants are superceded by the
26 various ISO 8859-? standards and the Asian variants are embedded in
27 larger character sets. Therefore this implementation is simply
28 here to make it possible to do the conversion if it is necessary.
29 The cost in the gconv-modules file is set to `2' and therefore
30 allows one to easily provide a tuned implementation in case this
31 proofs to be necessary. */
39 /* Definitions used in the body of the `gconv' function. */
40 #define FROM_LOOP from_ascii
41 #define TO_LOOP to_ascii
44 #define MIN_NEEDED_FROM 1
45 #define MIN_NEEDED_TO 4
46 #define ONE_DIRECTION 0
48 #define FROM_DIRECTION (dir == from_iso646)
49 #define PREPARE_LOOP \
50 enum direction dir = ((struct iso646_data *) step->__data)->dir; \
51 enum variant var = ((struct iso646_data *) step->__data)->var;
52 #define EXTRA_LOOP_ARGS , var
55 /* Direction of the transformation. */
63 /* See names below, must be in the same order. */
67 CA
, /* CSA_Z243.4-1985-1 */
68 CA2
, /* CSA_Z243.4-1985-2 */
75 JP
, /* JIS_C6220-1969-RO */
76 JP_OCR_B
, /* JIS_C6229-1984-B */
77 YU
, /* JUS_I.B1.002 */
82 FR1
, /* NF_Z_62-010_(1973) */
87 SE
, /* SEN_850200_B */
88 SE2
/* SEN_850200_C */
91 /* Must be in the same order as enum variant above. */
92 static const char names
[] =
94 "CSA_Z243.4-1985-1//\0"
95 "CSA_Z243.4-1985-2//\0"
102 "JIS_C6220-1969-RO//\0"
103 "JIS_C6229-1984-B//\0"
109 "NF_Z_62-010_1973//\0" /* Note that we don't have the parenthesis in
126 extern int gconv_init (struct __gconv_step
*step
);
128 gconv_init (struct __gconv_step
*step
)
130 /* Determine which direction. */
131 struct iso646_data
*new_data
;
132 enum direction dir
= illegal_dir
;
135 enum variant var
= 0;
136 for (const char *name
= names
; *name
!= '\0';
137 name
= __rawmemchr (name
, '\0') + 1)
139 if (__strcasecmp (step
->__from_name
, name
) == 0)
144 else if (__strcasecmp (step
->__to_name
, name
) == 0)
152 result
= __GCONV_NOCONV
;
153 if (__builtin_expect (dir
, from_iso646
) != illegal_dir
)
155 new_data
= (struct iso646_data
*) malloc (sizeof (struct iso646_data
));
157 result
= __GCONV_NOMEM
;
158 if (new_data
!= NULL
)
162 step
->__data
= new_data
;
164 if (dir
== from_iso646
)
166 step
->__min_needed_from
= MIN_NEEDED_FROM
;
167 step
->__max_needed_from
= MIN_NEEDED_FROM
;
168 step
->__min_needed_to
= MIN_NEEDED_TO
;
169 step
->__max_needed_to
= MIN_NEEDED_TO
;
173 step
->__min_needed_from
= MIN_NEEDED_TO
;
174 step
->__max_needed_from
= MIN_NEEDED_TO
;
175 step
->__min_needed_to
= MIN_NEEDED_FROM
;
176 step
->__max_needed_to
= MIN_NEEDED_FROM
;
179 step
->__stateful
= 0;
189 extern void gconv_end (struct __gconv_step
*data
);
191 gconv_end (struct __gconv_step
*data
)
197 /* First define the conversion function from ASCII to UCS4. */
198 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
199 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
200 #define LOOPFCT FROM_LOOP
204 int failure = __GCONV_OK; \
210 if (var == GB || var == ES || var == IT || var == FR || var == FR1) \
212 else if (var == NO2) \
218 else if (var == HU || var == CU || var == SE || var == SE2) \
222 if (var == CA || var == CA2 || var == FR || var == FR1) \
224 else if (var == DE || var == ES || var == IT || var == PT) \
226 else if (var == ES2) \
228 else if (var == YU) \
230 else if (var == HU) \
232 else if (var == PT2) \
234 else if (var == SE2) \
238 if (var == CA || var == CA2) \
240 else if (var == DE || var == SE || var == SE2) \
242 else if (var == DK || var == NO || var == NO2) \
244 else if (var == ES || var == ES2 || var == CU) \
246 else if (var == IT || var == FR || var == FR1) \
248 else if (var == JP_OCR_B) \
250 else if (var == YU) \
252 else if (var == HU) \
254 else if (var == PT || var == PT2) \
258 if (var == CA || var == CA2 || var == IT || var == FR || var == FR1) \
260 else if (var == DE || var == HU || var == SE || var == SE2) \
262 else if (var == DK || var == NO || var == NO2) \
264 else if (var == ES || var == ES2 || var == CU) \
266 else if (var == JP || var == JP_OCR_B) \
268 else if (var == YU) \
270 else if (var == KR) \
272 else if (var == PT || var == PT2) \
276 if (var == CA || var == CA2) \
278 else if (var == DE || var == HU) \
280 else if (var == DK || var == NO || var == NO2 || var == SE \
283 else if (var == ES) \
285 else if (var == ES2) \
287 else if (var == IT) \
289 else if (var == JP_OCR_B) \
291 else if (var == YU) \
293 else if (var == FR || var == FR1) \
295 else if (var == PT || var == PT2) \
301 else if (var == CA2) \
303 else if (var == ES2 || var == CU) \
305 else if (var == YU) \
307 else if (var == SE2) \
311 if (var == CA || var == CA2) \
313 else if (var == IT) \
315 else if (var == JP_OCR_B) \
316 /* Illegal character. */ \
317 failure = __GCONV_ILLEGAL_INPUT; \
318 else if (var == YU) \
320 else if (var == HU) \
322 else if (var == FR) \
324 else if (var == SE2) \
328 if (var == CA || var == CA2 || var == HU || var == FR || var == FR1) \
330 else if (var == DE || var == SE || var == SE2) \
332 else if (var == DK || var == NO || var == NO2) \
334 else if (var == ES) \
336 else if (var == ES2 || var == CU) \
338 else if (var == IT) \
340 else if (var == YU) \
342 else if (var == PT || var == PT2) \
346 if (var == CA || var == CA2 || var == FR || var == FR1) \
348 else if (var == DE || var == HU || var == SE || var == SE2) \
350 else if (var == DK || var == NO || var == NO2) \
352 else if (var == ES || var == ES2 || var == CU) \
354 else if (var == IT) \
356 else if (var == YU) \
358 else if (var == PT || var == PT2) \
362 if (var == CA || var == CA2 || var == IT || var == FR || var == FR1) \
364 else if (var == DE || var == HU) \
366 else if (var == DK || var == NO || var == NO2 || var == SE \
369 else if (var == ES || var == ES2) \
371 else if (var == YU) \
373 else if (var == CU) \
375 else if (var == PT || var == PT2) \
379 if (var == GB || var == CN || var == JP || var == NO || var == SE) \
381 else if (var == CA || var == CA2) \
383 else if (var == DE) \
385 else if (var == ES2 || var == CU || var == FR || var == FR1) \
387 else if (var == IT) \
389 else if (var == JP_OCR_B) \
390 /* Illegal character. */ \
391 failure = __GCONV_ILLEGAL_INPUT; \
392 else if (var == YU) \
394 else if (var == HU) \
396 else if (var == NO2) \
398 else if (var == PT) \
400 else if (var == SE2) \
405 case 0x80 ... 0xff: \
406 /* Illegal character. */ \
407 failure = __GCONV_ILLEGAL_INPUT; \
411 /* Hopefully gcc can recognize that the following `if' is only true \
412 when we reach the default case in the `switch' statement. */ \
413 if (__builtin_expect (failure, __GCONV_OK) == __GCONV_ILLEGAL_INPUT) \
415 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
419 put32 (outptr, ch); \
424 #define LOOP_NEED_FLAGS
425 #define EXTRA_LOOP_DECLS , enum variant var
426 #include <iconv/loop.c>
429 /* Next, define the other direction. */
430 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
431 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
432 #define LOOPFCT TO_LOOP
436 int failure = __GCONV_OK; \
438 ch = get32 (inptr); \
442 if (var == GB || var == ES || var == IT || var == FR || var == FR1 \
444 failure = __GCONV_ILLEGAL_INPUT; \
447 if (var == CN || var == HU || var == CU || var == SE || var == SE2) \
448 failure = __GCONV_ILLEGAL_INPUT; \
451 if (var == CA || var == CA2 || var == DE || var == ES || var == ES2 \
452 || var == IT || var == YU || var == HU || var == FR || var == FR1 \
453 || var == PT || var == PT2 || var == SE2) \
454 failure = __GCONV_ILLEGAL_INPUT; \
457 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
458 || var == ES2 || var == IT || var == JP_OCR_B || var == YU \
459 || var == HU || var == FR || var == FR1 || var == NO \
460 || var == NO2 || var == PT || var == PT2 || var == SE \
462 failure = __GCONV_ILLEGAL_INPUT; \
463 else if (var == CU) \
467 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
468 || var == ES2 || var == IT || var == JP || var == JP_OCR_B \
469 || var == YU || var == KR || var == HU || var == CU || var == FR \
470 || var == FR1 || var == NO || var == NO2 || var == PT \
471 || var == PT2 || var == SE || var == SE2) \
472 failure = __GCONV_ILLEGAL_INPUT; \
475 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
476 || var == ES2 || var == IT || var == JP_OCR_B || var == YU \
477 || var == HU || var == FR || var == FR1 || var == NO \
478 || var == NO2 || var == PT || var == PT2 || var == SE \
480 failure = __GCONV_ILLEGAL_INPUT; \
483 if (var == CA || var == CA2 || var == ES2 || var == YU || var == CU \
485 failure = __GCONV_ILLEGAL_INPUT; \
488 if (var == CA || var == CA2 || var == IT || var == JP_OCR_B \
489 || var == YU || var == HU || var == FR || var == SE2) \
490 failure = __GCONV_ILLEGAL_INPUT; \
493 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
494 || var == ES2 || var == IT || var == YU || var == HU \
495 || var == CU || var == FR || var == FR1 || var == NO \
496 || var == NO2 || var == PT || var == PT2 || var == SE \
498 failure = __GCONV_ILLEGAL_INPUT; \
501 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
502 || var == ES2 || var == IT || var == YU || var == HU || var == CU \
503 || var == FR || var == FR1 || var == NO || var == PT \
504 || var == PT2 || var == SE || var == SE2) \
505 failure = __GCONV_ILLEGAL_INPUT; \
506 else if (var == NO2) \
510 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
511 || var == ES2 || var == IT || var == YU || var == HU || var == CU \
512 || var == FR || var == FR1 || var == NO || var == NO2 \
513 || var == PT || var == PT2 || var == SE || var == SE2) \
514 failure = __GCONV_ILLEGAL_INPUT; \
517 if (var == GB || var == CA || var == CA2 || var == DE || var == ES2 \
518 || var == CN || var == IT || var == JP || var == JP_OCR_B \
519 || var == YU || var == HU || var == CU || var == FR || var == FR1 \
520 || var == NO || var == NO2 || var == PT || var == SE \
522 failure = __GCONV_ILLEGAL_INPUT; \
525 if (var != ES && var != ES2 && var != CU) \
526 failure = __GCONV_ILLEGAL_INPUT; \
530 if (var != GB && var != ES && var != IT && var != FR && var != FR1) \
531 failure = __GCONV_ILLEGAL_INPUT; \
535 if (var != HU && var != CU && var != SE && var != SE2) \
536 failure = __GCONV_ILLEGAL_INPUT; \
542 else if (var == JP || var == JP_OCR_B) \
545 failure = __GCONV_ILLEGAL_INPUT; \
548 if (var == DE || var == ES || var == IT || var == PT) \
550 else if (var == FR || var == FR1) \
552 else if (var == NO2) \
555 failure = __GCONV_ILLEGAL_INPUT; \
558 if (var != ES2 && var != CU && var != FR && var != FR1) \
559 failure = __GCONV_ILLEGAL_INPUT; \
565 else if (var == IT || var == FR || var == FR1) \
567 else if (var == PT) \
570 failure = __GCONV_ILLEGAL_INPUT; \
573 if (var == ES2 || var == CU) \
575 else if (var == PT2) \
578 failure = __GCONV_ILLEGAL_INPUT; \
582 failure = __GCONV_ILLEGAL_INPUT; \
588 else if (var == ES2 || var == CU) \
591 failure = __GCONV_ILLEGAL_INPUT; \
595 failure = __GCONV_ILLEGAL_INPUT; \
599 if (var != PT && var != PT2) \
600 failure = __GCONV_ILLEGAL_INPUT; \
604 if (var != DE && var != SE && var != SE2) \
605 failure = __GCONV_ILLEGAL_INPUT; \
609 if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \
610 failure = __GCONV_ILLEGAL_INPUT; \
614 if (var != DK && var != NO && var != NO2) \
615 failure = __GCONV_ILLEGAL_INPUT; \
621 else if (var == PT || var == PT2) \
624 failure = __GCONV_ILLEGAL_INPUT; \
629 else if (var == HU) \
631 else if (var == SE2) \
634 failure = __GCONV_ILLEGAL_INPUT; \
637 if (var != ES && var != ES2 && var != CU) \
638 failure = __GCONV_ILLEGAL_INPUT; \
642 if (var != PT && var != PT2) \
643 failure = __GCONV_ILLEGAL_INPUT; \
647 if (var != DE && var != HU && var != SE && var != SE2) \
648 failure = __GCONV_ILLEGAL_INPUT; \
652 if (var != DK && var != NO && var != NO2) \
653 failure = __GCONV_ILLEGAL_INPUT; \
657 if (var == DE || var == HU) \
659 else if (var == SE2) \
662 failure = __GCONV_ILLEGAL_INPUT; \
666 failure = __GCONV_ILLEGAL_INPUT; \
670 if (var == CA || var == CA2 || var == FR || var == FR1) \
672 else if (var == IT) \
675 failure = __GCONV_ILLEGAL_INPUT; \
679 failure = __GCONV_ILLEGAL_INPUT; \
683 if (var != CA && var != CA2) \
684 failure = __GCONV_ILLEGAL_INPUT; \
688 if (var != PT && var != PT2) \
689 failure = __GCONV_ILLEGAL_INPUT; \
693 if (var != DE && var != SE && var != SE2) \
694 failure = __GCONV_ILLEGAL_INPUT; \
698 if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \
699 failure = __GCONV_ILLEGAL_INPUT; \
703 if (var != DK && var != NO && var != NO2) \
704 failure = __GCONV_ILLEGAL_INPUT; \
708 if (var == CA || var == CA2 || var == IT || var == FR || var == FR1) \
710 else if (var == ES || var == ES2) \
712 else if (var == PT || var == PT2) \
715 failure = __GCONV_ILLEGAL_INPUT; \
718 if (var != CA && var != CA2 && var != IT && var != FR && var != FR1) \
719 failure = __GCONV_ILLEGAL_INPUT; \
723 if (var == CA || var == CA2 || var == HU || var == FR || var == FR1) \
725 else if (var == IT) \
727 else if (var == SE2) \
730 failure = __GCONV_ILLEGAL_INPUT; \
733 if (var != CA && var != CA2) \
734 failure = __GCONV_ILLEGAL_INPUT; \
739 failure = __GCONV_ILLEGAL_INPUT; \
744 failure = __GCONV_ILLEGAL_INPUT; \
748 if (var != ES && var != ES2 && var != CU) \
749 failure = __GCONV_ILLEGAL_INPUT; \
754 failure = __GCONV_ILLEGAL_INPUT; \
758 if (var != CA && var != CA2) \
759 failure = __GCONV_ILLEGAL_INPUT; \
763 if (var != PT && var != PT2) \
764 failure = __GCONV_ILLEGAL_INPUT; \
768 if (var != DE && var != HU && var != SE && var != SE2) \
769 failure = __GCONV_ILLEGAL_INPUT; \
773 if (var != DK && var != NO && var != NO2) \
774 failure = __GCONV_ILLEGAL_INPUT; \
778 if (var == CA || var == CA2 || var == FR || var == FR1) \
780 else if (var == IT) \
783 failure = __GCONV_ILLEGAL_INPUT; \
786 if (var != CA && var != CA2) \
787 failure = __GCONV_ILLEGAL_INPUT; \
791 if (var == DE || var == HU) \
793 else if (var == SE2) \
796 failure = __GCONV_ILLEGAL_INPUT; \
800 failure = __GCONV_ILLEGAL_INPUT; \
805 failure = __GCONV_ILLEGAL_INPUT; \
810 failure = __GCONV_ILLEGAL_INPUT; \
815 failure = __GCONV_ILLEGAL_INPUT; \
820 failure = __GCONV_ILLEGAL_INPUT; \
825 failure = __GCONV_ILLEGAL_INPUT; \
830 failure = __GCONV_ILLEGAL_INPUT; \
835 failure = __GCONV_ILLEGAL_INPUT; \
840 failure = __GCONV_ILLEGAL_INPUT; \
845 failure = __GCONV_ILLEGAL_INPUT; \
850 failure = __GCONV_ILLEGAL_INPUT; \
855 failure = __GCONV_ILLEGAL_INPUT; \
859 if (var != GB && var != CN && var != JP && var != NO && var != SE) \
860 failure = __GCONV_ILLEGAL_INPUT; \
865 failure = __GCONV_ILLEGAL_INPUT; \
869 if (var != JP_OCR_B) \
870 failure = __GCONV_ILLEGAL_INPUT; \
874 if (var != JP_OCR_B) \
875 failure = __GCONV_ILLEGAL_INPUT; \
879 if (__glibc_unlikely (ch > 0x7f)) \
881 UNICODE_TAG_HANDLER (ch, 4); \
882 failure = __GCONV_ILLEGAL_INPUT; \
887 if (__builtin_expect (failure, __GCONV_OK) == __GCONV_ILLEGAL_INPUT) \
889 STANDARD_TO_LOOP_ERR_HANDLER (4); \
892 *outptr++ = (unsigned char) ch; \
895 #define LOOP_NEED_FLAGS
896 #define EXTRA_LOOP_DECLS , enum variant var
897 #include <iconv/loop.c>
900 /* Now define the toplevel functions. */
901 #include <iconv/skeleton.c>