1 /* Conversion to and from the various ISO 646 CCS.
2 Copyright (C) 1998-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* The implementation of the conversion which can be performed by this
20 module are not very sophisticated and not tuned at all. There are
21 zillions of ISO 646 derivates and supporting them all in a separate
22 module is overkill since these coded character sets are hardly ever
23 used anymore (except ANSI_X3.4-1968 == ASCII, which is compatible
24 with ISO 8859-1). The European variants are superceded by the
25 various ISO 8859-? standards and the Asian variants are embedded in
26 larger character sets. Therefore this implementation is simply
27 here to make it possible to do the conversion if it is necessary.
28 The cost in the gconv-modules file is set to `2' and therefore
29 allows one to easily provide a tuned implementation in case this
30 proofs to be necessary. */
38 /* Definitions used in the body of the `gconv' function. */
39 #define FROM_LOOP from_ascii
40 #define TO_LOOP to_ascii
43 #define MIN_NEEDED_FROM 1
44 #define MIN_NEEDED_TO 4
45 #define ONE_DIRECTION 0
47 #define FROM_DIRECTION (dir == from_iso646)
48 #define PREPARE_LOOP \
49 enum direction dir = ((struct iso646_data *) step->__data)->dir; \
50 enum variant var = ((struct iso646_data *) step->__data)->var;
51 #define EXTRA_LOOP_ARGS , var
54 /* Direction of the transformation. */
62 /* See names below, must be in the same order. */
66 CA
, /* CSA_Z243.4-1985-1 */
67 CA2
, /* CSA_Z243.4-1985-2 */
74 JP
, /* JIS_C6220-1969-RO */
75 JP_OCR_B
, /* JIS_C6229-1984-B */
76 YU
, /* JUS_I.B1.002 */
81 FR1
, /* NF_Z_62-010_(1973) */
86 SE
, /* SEN_850200_B */
87 SE2
/* SEN_850200_C */
90 /* Must be in the same order as enum variant above. */
91 static const char names
[] =
93 "CSA_Z243.4-1985-1//\0"
94 "CSA_Z243.4-1985-2//\0"
101 "JIS_C6220-1969-RO//\0"
102 "JIS_C6229-1984-B//\0"
108 "NF_Z_62-010_1973//\0" /* Note that we don't have the parenthesis in
125 extern int gconv_init (struct __gconv_step
*step
);
127 gconv_init (struct __gconv_step
*step
)
129 /* Determine which direction. */
130 struct iso646_data
*new_data
;
131 enum direction dir
= illegal_dir
;
134 enum variant var
= 0;
135 for (const char *name
= names
; *name
!= '\0';
136 name
= __rawmemchr (name
, '\0') + 1)
138 if (__strcasecmp (step
->__from_name
, name
) == 0)
143 else if (__strcasecmp (step
->__to_name
, name
) == 0)
151 result
= __GCONV_NOCONV
;
152 if (__builtin_expect (dir
, from_iso646
) != illegal_dir
)
154 new_data
= (struct iso646_data
*) malloc (sizeof (struct iso646_data
));
156 result
= __GCONV_NOMEM
;
157 if (new_data
!= NULL
)
161 step
->__data
= new_data
;
163 if (dir
== from_iso646
)
165 step
->__min_needed_from
= MIN_NEEDED_FROM
;
166 step
->__max_needed_from
= MIN_NEEDED_FROM
;
167 step
->__min_needed_to
= MIN_NEEDED_TO
;
168 step
->__max_needed_to
= MIN_NEEDED_TO
;
172 step
->__min_needed_from
= MIN_NEEDED_TO
;
173 step
->__max_needed_from
= MIN_NEEDED_TO
;
174 step
->__min_needed_to
= MIN_NEEDED_FROM
;
175 step
->__max_needed_to
= MIN_NEEDED_FROM
;
178 step
->__stateful
= 0;
188 extern void gconv_end (struct __gconv_step
*data
);
190 gconv_end (struct __gconv_step
*data
)
196 /* First define the conversion function from ASCII to UCS4. */
197 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
198 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
199 #define LOOPFCT FROM_LOOP
203 int failure = __GCONV_OK; \
209 if (var == GB || var == ES || var == IT || var == FR || var == FR1) \
211 else if (var == NO2) \
217 else if (var == HU || var == CU || var == SE || var == SE2) \
221 if (var == CA || var == CA2 || var == FR || var == FR1) \
223 else if (var == DE || var == ES || var == IT || var == PT) \
225 else if (var == ES2) \
227 else if (var == YU) \
229 else if (var == HU) \
231 else if (var == PT2) \
233 else if (var == SE2) \
237 if (var == CA || var == CA2) \
239 else if (var == DE || var == SE || var == SE2) \
241 else if (var == DK || var == NO || var == NO2) \
243 else if (var == ES || var == ES2 || var == CU) \
245 else if (var == IT || var == FR || var == FR1) \
247 else if (var == JP_OCR_B) \
249 else if (var == YU) \
251 else if (var == HU) \
253 else if (var == PT || var == PT2) \
257 if (var == CA || var == CA2 || var == IT || var == FR || var == FR1) \
259 else if (var == DE || var == HU || var == SE || var == SE2) \
261 else if (var == DK || var == NO || var == NO2) \
263 else if (var == ES || var == ES2 || var == CU) \
265 else if (var == JP || var == JP_OCR_B) \
267 else if (var == YU) \
269 else if (var == KR) \
271 else if (var == PT || var == PT2) \
275 if (var == CA || var == CA2) \
277 else if (var == DE || var == HU) \
279 else if (var == DK || var == NO || var == NO2 || var == SE \
282 else if (var == ES) \
284 else if (var == ES2) \
286 else if (var == IT) \
288 else if (var == JP_OCR_B) \
290 else if (var == YU) \
292 else if (var == FR || var == FR1) \
294 else if (var == PT || var == PT2) \
300 else if (var == CA2) \
302 else if (var == ES2 || var == CU) \
304 else if (var == YU) \
306 else if (var == SE2) \
310 if (var == CA || var == CA2) \
312 else if (var == IT) \
314 else if (var == JP_OCR_B) \
315 /* Illegal character. */ \
316 failure = __GCONV_ILLEGAL_INPUT; \
317 else if (var == YU) \
319 else if (var == HU) \
321 else if (var == FR) \
323 else if (var == SE2) \
327 if (var == CA || var == CA2 || var == HU || var == FR || var == FR1) \
329 else if (var == DE || var == SE || var == SE2) \
331 else if (var == DK || var == NO || var == NO2) \
333 else if (var == ES) \
335 else if (var == ES2 || var == CU) \
337 else if (var == IT) \
339 else if (var == YU) \
341 else if (var == PT || var == PT2) \
345 if (var == CA || var == CA2 || var == FR || var == FR1) \
347 else if (var == DE || var == HU || var == SE || var == SE2) \
349 else if (var == DK || var == NO || var == NO2) \
351 else if (var == ES || var == ES2 || var == CU) \
353 else if (var == IT) \
355 else if (var == YU) \
357 else if (var == PT || var == PT2) \
361 if (var == CA || var == CA2 || var == IT || var == FR || var == FR1) \
363 else if (var == DE || var == HU) \
365 else if (var == DK || var == NO || var == NO2 || var == SE \
368 else if (var == ES || var == ES2) \
370 else if (var == YU) \
372 else if (var == CU) \
374 else if (var == PT || var == PT2) \
378 if (var == GB || var == CN || var == JP || var == NO || var == SE) \
380 else if (var == CA || var == CA2) \
382 else if (var == DE) \
384 else if (var == ES2 || var == CU || var == FR || var == FR1) \
386 else if (var == IT) \
388 else if (var == JP_OCR_B) \
389 /* Illegal character. */ \
390 failure = __GCONV_ILLEGAL_INPUT; \
391 else if (var == YU) \
393 else if (var == HU) \
395 else if (var == NO2) \
397 else if (var == PT) \
399 else if (var == SE2) \
404 case 0x80 ... 0xff: \
405 /* Illegal character. */ \
406 failure = __GCONV_ILLEGAL_INPUT; \
410 /* Hopefully gcc can recognize that the following `if' is only true \
411 when we reach the default case in the `switch' statement. */ \
412 if (__builtin_expect (failure, __GCONV_OK) == __GCONV_ILLEGAL_INPUT) \
414 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
418 put32 (outptr, ch); \
423 #define LOOP_NEED_FLAGS
424 #define EXTRA_LOOP_DECLS , enum variant var
425 #include <iconv/loop.c>
428 /* Next, define the other direction. */
429 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
430 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
431 #define LOOPFCT TO_LOOP
435 int failure = __GCONV_OK; \
437 ch = get32 (inptr); \
441 if (var == GB || var == ES || var == IT || var == FR || var == FR1 \
443 failure = __GCONV_ILLEGAL_INPUT; \
446 if (var == CN || var == HU || var == CU || var == SE || var == SE2) \
447 failure = __GCONV_ILLEGAL_INPUT; \
450 if (var == CA || var == CA2 || var == DE || var == ES || var == ES2 \
451 || var == IT || var == YU || var == HU || var == FR || var == FR1 \
452 || var == PT || var == PT2 || var == SE2) \
453 failure = __GCONV_ILLEGAL_INPUT; \
456 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
457 || var == ES2 || var == IT || var == JP_OCR_B || var == YU \
458 || var == HU || var == FR || var == FR1 || var == NO \
459 || var == NO2 || var == PT || var == PT2 || var == SE \
461 failure = __GCONV_ILLEGAL_INPUT; \
462 else if (var == CU) \
466 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
467 || var == ES2 || var == IT || var == JP || var == JP_OCR_B \
468 || var == YU || var == KR || var == HU || var == CU || var == FR \
469 || var == FR1 || var == NO || var == NO2 || var == PT \
470 || var == PT2 || var == SE || var == SE2) \
471 failure = __GCONV_ILLEGAL_INPUT; \
474 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
475 || var == ES2 || var == IT || var == JP_OCR_B || var == YU \
476 || var == HU || var == FR || var == FR1 || var == NO \
477 || var == NO2 || var == PT || var == PT2 || var == SE \
479 failure = __GCONV_ILLEGAL_INPUT; \
482 if (var == CA || var == CA2 || var == ES2 || var == YU || var == CU \
484 failure = __GCONV_ILLEGAL_INPUT; \
487 if (var == CA || var == CA2 || var == IT || var == JP_OCR_B \
488 || var == YU || var == HU || var == FR || var == SE2) \
489 failure = __GCONV_ILLEGAL_INPUT; \
492 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
493 || var == ES2 || var == IT || var == YU || var == HU \
494 || var == CU || var == FR || var == FR1 || var == NO \
495 || var == NO2 || var == PT || var == PT2 || var == SE \
497 failure = __GCONV_ILLEGAL_INPUT; \
500 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
501 || var == ES2 || var == IT || var == YU || var == HU || var == CU \
502 || var == FR || var == FR1 || var == NO || var == PT \
503 || var == PT2 || var == SE || var == SE2) \
504 failure = __GCONV_ILLEGAL_INPUT; \
505 else if (var == NO2) \
509 if (var == CA || var == CA2 || var == DE || var == DK || var == ES \
510 || var == ES2 || var == IT || var == YU || var == HU || var == CU \
511 || var == FR || var == FR1 || var == NO || var == NO2 \
512 || var == PT || var == PT2 || var == SE || var == SE2) \
513 failure = __GCONV_ILLEGAL_INPUT; \
516 if (var == GB || var == CA || var == CA2 || var == DE || var == ES2 \
517 || var == CN || var == IT || var == JP || var == JP_OCR_B \
518 || var == YU || var == HU || var == CU || var == FR || var == FR1 \
519 || var == NO || var == NO2 || var == PT || var == SE \
521 failure = __GCONV_ILLEGAL_INPUT; \
524 if (var != ES && var != ES2 && var != CU) \
525 failure = __GCONV_ILLEGAL_INPUT; \
529 if (var != GB && var != ES && var != IT && var != FR && var != FR1) \
530 failure = __GCONV_ILLEGAL_INPUT; \
534 if (var != HU && var != CU && var != SE && var != SE2) \
535 failure = __GCONV_ILLEGAL_INPUT; \
541 else if (var == JP || var == JP_OCR_B) \
544 failure = __GCONV_ILLEGAL_INPUT; \
547 if (var == DE || var == ES || var == IT || var == PT) \
549 else if (var == FR || var == FR1) \
551 else if (var == NO2) \
554 failure = __GCONV_ILLEGAL_INPUT; \
557 if (var != ES2 && var != CU && var != FR && var != FR1) \
558 failure = __GCONV_ILLEGAL_INPUT; \
564 else if (var == IT || var == FR || var == FR1) \
566 else if (var == PT) \
569 failure = __GCONV_ILLEGAL_INPUT; \
572 if (var == ES2 || var == CU) \
574 else if (var == PT2) \
577 failure = __GCONV_ILLEGAL_INPUT; \
581 failure = __GCONV_ILLEGAL_INPUT; \
587 else if (var == ES2 || var == CU) \
590 failure = __GCONV_ILLEGAL_INPUT; \
594 failure = __GCONV_ILLEGAL_INPUT; \
598 if (var != PT && var != PT2) \
599 failure = __GCONV_ILLEGAL_INPUT; \
603 if (var != DE && var != SE && var != SE2) \
604 failure = __GCONV_ILLEGAL_INPUT; \
608 if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \
609 failure = __GCONV_ILLEGAL_INPUT; \
613 if (var != DK && var != NO && var != NO2) \
614 failure = __GCONV_ILLEGAL_INPUT; \
620 else if (var == PT || var == PT2) \
623 failure = __GCONV_ILLEGAL_INPUT; \
628 else if (var == HU) \
630 else if (var == SE2) \
633 failure = __GCONV_ILLEGAL_INPUT; \
636 if (var != ES && var != ES2 && var != CU) \
637 failure = __GCONV_ILLEGAL_INPUT; \
641 if (var != PT && var != PT2) \
642 failure = __GCONV_ILLEGAL_INPUT; \
646 if (var != DE && var != HU && var != SE && var != SE2) \
647 failure = __GCONV_ILLEGAL_INPUT; \
651 if (var != DK && var != NO && var != NO2) \
652 failure = __GCONV_ILLEGAL_INPUT; \
656 if (var == DE || var == HU) \
658 else if (var == SE2) \
661 failure = __GCONV_ILLEGAL_INPUT; \
665 failure = __GCONV_ILLEGAL_INPUT; \
669 if (var == CA || var == CA2 || var == FR || var == FR1) \
671 else if (var == IT) \
674 failure = __GCONV_ILLEGAL_INPUT; \
678 failure = __GCONV_ILLEGAL_INPUT; \
682 if (var != CA && var != CA2) \
683 failure = __GCONV_ILLEGAL_INPUT; \
687 if (var != PT && var != PT2) \
688 failure = __GCONV_ILLEGAL_INPUT; \
692 if (var != DE && var != SE && var != SE2) \
693 failure = __GCONV_ILLEGAL_INPUT; \
697 if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \
698 failure = __GCONV_ILLEGAL_INPUT; \
702 if (var != DK && var != NO && var != NO2) \
703 failure = __GCONV_ILLEGAL_INPUT; \
707 if (var == CA || var == CA2 || var == IT || var == FR || var == FR1) \
709 else if (var == ES || var == ES2) \
711 else if (var == PT || var == PT2) \
714 failure = __GCONV_ILLEGAL_INPUT; \
717 if (var != CA && var != CA2 && var != IT && var != FR && var != FR1) \
718 failure = __GCONV_ILLEGAL_INPUT; \
722 if (var == CA || var == CA2 || var == HU || var == FR || var == FR1) \
724 else if (var == IT) \
726 else if (var == SE2) \
729 failure = __GCONV_ILLEGAL_INPUT; \
732 if (var != CA && var != CA2) \
733 failure = __GCONV_ILLEGAL_INPUT; \
738 failure = __GCONV_ILLEGAL_INPUT; \
743 failure = __GCONV_ILLEGAL_INPUT; \
747 if (var != ES && var != ES2 && var != CU) \
748 failure = __GCONV_ILLEGAL_INPUT; \
753 failure = __GCONV_ILLEGAL_INPUT; \
757 if (var != CA && var != CA2) \
758 failure = __GCONV_ILLEGAL_INPUT; \
762 if (var != PT && var != PT2) \
763 failure = __GCONV_ILLEGAL_INPUT; \
767 if (var != DE && var != HU && var != SE && var != SE2) \
768 failure = __GCONV_ILLEGAL_INPUT; \
772 if (var != DK && var != NO && var != NO2) \
773 failure = __GCONV_ILLEGAL_INPUT; \
777 if (var == CA || var == CA2 || var == FR || var == FR1) \
779 else if (var == IT) \
782 failure = __GCONV_ILLEGAL_INPUT; \
785 if (var != CA && var != CA2) \
786 failure = __GCONV_ILLEGAL_INPUT; \
790 if (var == DE || var == HU) \
792 else if (var == SE2) \
795 failure = __GCONV_ILLEGAL_INPUT; \
799 failure = __GCONV_ILLEGAL_INPUT; \
804 failure = __GCONV_ILLEGAL_INPUT; \
809 failure = __GCONV_ILLEGAL_INPUT; \
814 failure = __GCONV_ILLEGAL_INPUT; \
819 failure = __GCONV_ILLEGAL_INPUT; \
824 failure = __GCONV_ILLEGAL_INPUT; \
829 failure = __GCONV_ILLEGAL_INPUT; \
834 failure = __GCONV_ILLEGAL_INPUT; \
839 failure = __GCONV_ILLEGAL_INPUT; \
844 failure = __GCONV_ILLEGAL_INPUT; \
849 failure = __GCONV_ILLEGAL_INPUT; \
854 failure = __GCONV_ILLEGAL_INPUT; \
858 if (var != GB && var != CN && var != JP && var != NO && var != SE) \
859 failure = __GCONV_ILLEGAL_INPUT; \
864 failure = __GCONV_ILLEGAL_INPUT; \
868 if (var != JP_OCR_B) \
869 failure = __GCONV_ILLEGAL_INPUT; \
873 if (var != JP_OCR_B) \
874 failure = __GCONV_ILLEGAL_INPUT; \
878 if (__glibc_unlikely (ch > 0x7f)) \
880 UNICODE_TAG_HANDLER (ch, 4); \
881 failure = __GCONV_ILLEGAL_INPUT; \
886 if (__builtin_expect (failure, __GCONV_OK) == __GCONV_ILLEGAL_INPUT) \
888 STANDARD_TO_LOOP_ERR_HANDLER (4); \
891 *outptr++ = (unsigned char) ch; \
894 #define LOOP_NEED_FLAGS
895 #define EXTRA_LOOP_DECLS , enum variant var
896 #include <iconv/loop.c>
899 /* Now define the toplevel functions. */
900 #include <iconv/skeleton.c>