1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include <sys/param.h>
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 __const unsigned char **, __const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
42 # define EILSEQ EINVAL
46 /* Transform from the internal, UCS4-like format, to UCS4. The
47 difference between the internal ucs4 format and the real UCS4
48 format is, if any, the endianess. The Unicode/ISO 10646 says that
49 unless some higher protocol specifies it differently, the byte
50 order is big endian.*/
53 #define MIN_NEEDED_FROM 4
54 #define MIN_NEEDED_TO 4
55 #define FROM_DIRECTION 1
56 #define FROM_LOOP internal_ucs4_loop
57 #define TO_LOOP internal_ucs4_loop /* This is not used. */
58 #define FUNCTION_NAME __gconv_transform_internal_ucs4
62 internal_ucs4_loop (struct __gconv_step
*step
,
63 struct __gconv_step_data
*step_data
,
64 const unsigned char **inptrp
, const unsigned char *inend
,
65 unsigned char **outptrp
, unsigned char *outend
,
68 const unsigned char *inptr
= *inptrp
;
69 unsigned char *outptr
= *outptrp
;
70 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
73 #if __BYTE_ORDER == __LITTLE_ENDIAN
74 /* Sigh, we have to do some real work. */
77 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
78 *((uint32_t *) outptr
)++ = bswap_32 (*(const uint32_t *) inptr
);
82 #elif __BYTE_ORDER == __BIG_ENDIAN
83 /* Simply copy the data. */
84 *inptrp
= inptr
+ n_convert
* 4;
85 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
87 # error "This endianess is not supported."
90 /* Determine the status. */
92 result
= __GCONV_EMPTY_INPUT
;
93 else if (*outptrp
+ 4 > outend
)
94 result
= __GCONV_FULL_OUTPUT
;
96 result
= __GCONV_INCOMPLETE_INPUT
;
101 #ifndef _STRING_ARCH_unaligned
103 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
104 struct __gconv_step_data
*step_data
,
105 const unsigned char **inptrp
,
106 const unsigned char *inend
,
107 unsigned char **outptrp
, unsigned char *outend
,
108 size_t *irreversible
)
110 const unsigned char *inptr
= *inptrp
;
111 unsigned char *outptr
= *outptrp
;
112 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
115 # if __BYTE_ORDER == __LITTLE_ENDIAN
116 /* Sigh, we have to do some real work. */
119 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
121 outptr
[0] = inptr
[3];
122 outptr
[1] = inptr
[2];
123 outptr
[2] = inptr
[1];
124 outptr
[3] = inptr
[0];
129 # elif __BYTE_ORDER == __BIG_ENDIAN
130 /* Simply copy the data. */
131 *inptrp
= inptr
+ n_convert
* 4;
132 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
134 # error "This endianess is not supported."
137 /* Determine the status. */
138 if (*inptrp
== inend
)
139 result
= __GCONV_EMPTY_INPUT
;
140 else if (*outptrp
+ 4 > outend
)
141 result
= __GCONV_FULL_OUTPUT
;
143 result
= __GCONV_INCOMPLETE_INPUT
;
151 internal_ucs4_loop_single (struct __gconv_step
*step
,
152 struct __gconv_step_data
*step_data
,
153 const unsigned char **inptrp
,
154 const unsigned char *inend
,
155 unsigned char **outptrp
, unsigned char *outend
,
156 size_t *irreversible
)
158 mbstate_t *state
= step_data
->__statep
;
159 size_t cnt
= state
->__count
& 7;
161 while (*inptrp
< inend
&& cnt
< 4)
162 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
164 if (__builtin_expect (cnt
< 4, 0))
166 /* Still not enough bytes. Store the ones in the input buffer. */
167 state
->__count
&= ~7;
168 state
->__count
|= cnt
;
170 return __GCONV_INCOMPLETE_INPUT
;
173 #if __BYTE_ORDER == __LITTLE_ENDIAN
174 (*outptrp
)[0] = state
->__value
.__wchb
[3];
175 (*outptrp
)[1] = state
->__value
.__wchb
[2];
176 (*outptrp
)[2] = state
->__value
.__wchb
[1];
177 (*outptrp
)[3] = state
->__value
.__wchb
[0];
180 #elif __BYTE_ORDER == __BIG_ENDIAN
182 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
184 # error "This endianess is not supported."
187 /* Clear the state buffer. */
188 state
->__count
&= ~7;
193 #include <iconv/skeleton.c>
196 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
197 for the other direction we have to check for correct values here. */
198 #define DEFINE_INIT 0
199 #define DEFINE_FINI 0
200 #define MIN_NEEDED_FROM 4
201 #define MIN_NEEDED_TO 4
202 #define FROM_DIRECTION 1
203 #define FROM_LOOP ucs4_internal_loop
204 #define TO_LOOP ucs4_internal_loop /* This is not used. */
205 #define FUNCTION_NAME __gconv_transform_ucs4_internal
209 ucs4_internal_loop (struct __gconv_step
*step
,
210 struct __gconv_step_data
*step_data
,
211 const unsigned char **inptrp
, const unsigned char *inend
,
212 unsigned char **outptrp
, unsigned char *outend
,
213 size_t *irreversible
)
215 int flags
= step_data
->__flags
;
216 const unsigned char *inptr
= *inptrp
;
217 unsigned char *outptr
= *outptrp
;
218 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
222 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
226 #if __BYTE_ORDER == __LITTLE_ENDIAN
227 inval
= bswap_32 (*(const uint32_t *) inptr
);
229 inval
= *(const uint32_t *) inptr
;
232 if (__builtin_expect (inval
> 0x7fffffff, 0))
234 /* The value is too large. We don't try transliteration here since
235 this is not an error because of the lack of possibilities to
236 represent the result. This is a genuine bug in the input since
237 UCS4 does not allow such values. */
238 if (irreversible
== NULL
)
239 /* We are transliterating, don't try to correct anything. */
240 return __GCONV_ILLEGAL_INPUT
;
242 if (flags
& __GCONV_IGNORE_ERRORS
)
244 /* Just ignore this character. */
251 return __GCONV_ILLEGAL_INPUT
;
254 *((uint32_t *) outptr
)++ = inval
;
260 /* Determine the status. */
261 if (*inptrp
== inend
)
262 result
= __GCONV_EMPTY_INPUT
;
263 else if (*outptrp
+ 4 > outend
)
264 result
= __GCONV_FULL_OUTPUT
;
266 result
= __GCONV_INCOMPLETE_INPUT
;
271 #ifndef _STRING_ARCH_unaligned
273 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
274 struct __gconv_step_data
*step_data
,
275 const unsigned char **inptrp
,
276 const unsigned char *inend
,
277 unsigned char **outptrp
, unsigned char *outend
,
278 size_t *irreversible
)
280 int flags
= step_data
->__flags
;
281 const unsigned char *inptr
= *inptrp
;
282 unsigned char *outptr
= *outptrp
;
283 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
287 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
289 if (__builtin_expect (inptr
[0] > 0x80, 0))
291 /* The value is too large. We don't try transliteration here since
292 this is not an error because of the lack of possibilities to
293 represent the result. This is a genuine bug in the input since
294 UCS4 does not allow such values. */
295 if (irreversible
== NULL
)
296 /* We are transliterating, don't try to correct anything. */
297 return __GCONV_ILLEGAL_INPUT
;
299 if (flags
& __GCONV_IGNORE_ERRORS
)
301 /* Just ignore this character. */
308 return __GCONV_ILLEGAL_INPUT
;
311 # if __BYTE_ORDER == __LITTLE_ENDIAN
312 outptr
[3] = inptr
[0];
313 outptr
[2] = inptr
[1];
314 outptr
[1] = inptr
[2];
315 outptr
[0] = inptr
[3];
317 outptr
[0] = inptr
[0];
318 outptr
[1] = inptr
[1];
319 outptr
[2] = inptr
[2];
320 outptr
[3] = inptr
[3];
328 /* Determine the status. */
329 if (*inptrp
== inend
)
330 result
= __GCONV_EMPTY_INPUT
;
331 else if (*outptrp
+ 4 > outend
)
332 result
= __GCONV_FULL_OUTPUT
;
334 result
= __GCONV_INCOMPLETE_INPUT
;
342 ucs4_internal_loop_single (struct __gconv_step
*step
,
343 struct __gconv_step_data
*step_data
,
344 const unsigned char **inptrp
,
345 const unsigned char *inend
,
346 unsigned char **outptrp
, unsigned char *outend
,
347 size_t *irreversible
)
349 mbstate_t *state
= step_data
->__statep
;
350 int flags
= step_data
->__flags
;
351 size_t cnt
= state
->__count
& 7;
353 while (*inptrp
< inend
&& cnt
< 4)
354 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
356 if (__builtin_expect (cnt
< 4, 0))
358 /* Still not enough bytes. Store the ones in the input buffer. */
359 state
->__count
&= ~7;
360 state
->__count
|= cnt
;
362 return __GCONV_INCOMPLETE_INPUT
;
365 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
368 /* The value is too large. We don't try transliteration here since
369 this is not an error because of the lack of possibilities to
370 represent the result. This is a genuine bug in the input since
371 UCS4 does not allow such values. */
372 if (!(flags
& __GCONV_IGNORE_ERRORS
))
374 *inptrp
-= cnt
- (state
->__count
& 7);
375 return __GCONV_ILLEGAL_INPUT
;
380 #if __BYTE_ORDER == __LITTLE_ENDIAN
381 (*outptrp
)[0] = state
->__value
.__wchb
[3];
382 (*outptrp
)[1] = state
->__value
.__wchb
[2];
383 (*outptrp
)[2] = state
->__value
.__wchb
[1];
384 (*outptrp
)[3] = state
->__value
.__wchb
[0];
385 #elif __BYTE_ORDER == __BIG_ENDIAN
386 (*outptrp
)[0] = state
->__value
.__wchb
[0];
387 (*outptrp
)[1] = state
->__value
.__wchb
[1];
388 (*outptrp
)[2] = state
->__value
.__wchb
[2];
389 (*outptrp
)[3] = state
->__value
.__wchb
[3];
395 /* Clear the state buffer. */
396 state
->__count
&= ~7;
401 #include <iconv/skeleton.c>
404 /* Similarly for the little endian form. */
405 #define DEFINE_INIT 0
406 #define DEFINE_FINI 0
407 #define MIN_NEEDED_FROM 4
408 #define MIN_NEEDED_TO 4
409 #define FROM_DIRECTION 1
410 #define FROM_LOOP internal_ucs4le_loop
411 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
412 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
416 internal_ucs4le_loop (struct __gconv_step
*step
,
417 struct __gconv_step_data
*step_data
,
418 const unsigned char **inptrp
, const unsigned char *inend
,
419 unsigned char **outptrp
, unsigned char *outend
,
420 size_t *irreversible
)
422 const unsigned char *inptr
= *inptrp
;
423 unsigned char *outptr
= *outptrp
;
424 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
427 #if __BYTE_ORDER == __BIG_ENDIAN
428 /* Sigh, we have to do some real work. */
431 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
432 *((uint32_t *) outptr
)++ = bswap_32 (*(const uint32_t *) inptr
);
436 #elif __BYTE_ORDER == __LITTLE_ENDIAN
437 /* Simply copy the data. */
438 *inptrp
= inptr
+ n_convert
* 4;
439 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
441 # error "This endianess is not supported."
444 /* Determine the status. */
445 if (*inptrp
== inend
)
446 result
= __GCONV_EMPTY_INPUT
;
447 else if (*outptrp
+ 4 > outend
)
448 result
= __GCONV_FULL_OUTPUT
;
450 result
= __GCONV_INCOMPLETE_INPUT
;
455 #ifndef _STRING_ARCH_unaligned
457 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
458 struct __gconv_step_data
*step_data
,
459 const unsigned char **inptrp
,
460 const unsigned char *inend
,
461 unsigned char **outptrp
, unsigned char *outend
,
462 size_t *irreversible
)
464 const unsigned char *inptr
= *inptrp
;
465 unsigned char *outptr
= *outptrp
;
466 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
469 # if __BYTE_ORDER == __BIG_ENDIAN
470 /* Sigh, we have to do some real work. */
473 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
475 outptr
[0] = inptr
[3];
476 outptr
[1] = inptr
[2];
477 outptr
[2] = inptr
[1];
478 outptr
[3] = inptr
[0];
483 # elif __BYTE_ORDER == __LITTLE_ENDIAN
484 /* Simply copy the data. */
485 *inptrp
= inptr
+ n_convert
* 4;
486 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
488 # error "This endianess is not supported."
491 /* Determine the status. */
492 if (*inptrp
+ 4 > inend
)
493 result
= __GCONV_EMPTY_INPUT
;
494 else if (*outptrp
+ 4 > outend
)
495 result
= __GCONV_FULL_OUTPUT
;
497 result
= __GCONV_INCOMPLETE_INPUT
;
505 internal_ucs4le_loop_single (struct __gconv_step
*step
,
506 struct __gconv_step_data
*step_data
,
507 const unsigned char **inptrp
,
508 const unsigned char *inend
,
509 unsigned char **outptrp
, unsigned char *outend
,
510 size_t *irreversible
)
512 mbstate_t *state
= step_data
->__statep
;
513 size_t cnt
= state
->__count
& 7;
515 while (*inptrp
< inend
&& cnt
< 4)
516 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
518 if (__builtin_expect (cnt
< 4, 0))
520 /* Still not enough bytes. Store the ones in the input buffer. */
521 state
->__count
&= ~7;
522 state
->__count
|= cnt
;
524 return __GCONV_INCOMPLETE_INPUT
;
527 #if __BYTE_ORDER == __BIG_ENDIAN
528 (*outptrp
)[0] = state
->__value
.__wchb
[3];
529 (*outptrp
)[1] = state
->__value
.__wchb
[2];
530 (*outptrp
)[2] = state
->__value
.__wchb
[1];
531 (*outptrp
)[3] = state
->__value
.__wchb
[0];
536 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
539 /* Clear the state buffer. */
540 state
->__count
&= ~7;
545 #include <iconv/skeleton.c>
548 /* And finally from UCS4-LE to the internal encoding. */
549 #define DEFINE_INIT 0
550 #define DEFINE_FINI 0
551 #define MIN_NEEDED_FROM 4
552 #define MIN_NEEDED_TO 4
553 #define FROM_DIRECTION 1
554 #define FROM_LOOP ucs4le_internal_loop
555 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
556 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
560 ucs4le_internal_loop (struct __gconv_step
*step
,
561 struct __gconv_step_data
*step_data
,
562 const unsigned char **inptrp
, const unsigned char *inend
,
563 unsigned char **outptrp
, unsigned char *outend
,
564 size_t *irreversible
)
566 int flags
= step_data
->__flags
;
567 const unsigned char *inptr
= *inptrp
;
568 unsigned char *outptr
= *outptrp
;
569 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
573 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
577 #if __BYTE_ORDER == __BIG_ENDIAN
578 inval
= bswap_32 (*(const uint32_t *) inptr
);
580 inval
= *(const uint32_t *) inptr
;
583 if (__builtin_expect (inval
> 0x7fffffff, 0))
585 /* The value is too large. We don't try transliteration here since
586 this is not an error because of the lack of possibilities to
587 represent the result. This is a genuine bug in the input since
588 UCS4 does not allow such values. */
589 if (irreversible
== NULL
)
590 /* We are transliterating, don't try to correct anything. */
591 return __GCONV_ILLEGAL_INPUT
;
593 if (flags
& __GCONV_IGNORE_ERRORS
)
595 /* Just ignore this character. */
600 return __GCONV_ILLEGAL_INPUT
;
603 *((uint32_t *) outptr
)++ = inval
;
609 /* Determine the status. */
610 if (*inptrp
== inend
)
611 result
= __GCONV_EMPTY_INPUT
;
612 else if (*outptrp
+ 4 > outend
)
613 result
= __GCONV_FULL_OUTPUT
;
615 result
= __GCONV_INCOMPLETE_INPUT
;
620 #ifndef _STRING_ARCH_unaligned
622 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
623 struct __gconv_step_data
*step_data
,
624 const unsigned char **inptrp
,
625 const unsigned char *inend
,
626 unsigned char **outptrp
, unsigned char *outend
,
627 size_t *irreversible
)
629 int flags
= step_data
->__flags
;
630 const unsigned char *inptr
= *inptrp
;
631 unsigned char *outptr
= *outptrp
;
632 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
636 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
638 if (__builtin_expect (inptr
[3] > 0x80, 0))
640 /* The value is too large. We don't try transliteration here since
641 this is not an error because of the lack of possibilities to
642 represent the result. This is a genuine bug in the input since
643 UCS4 does not allow such values. */
644 if (irreversible
== NULL
)
645 /* We are transliterating, don't try to correct anything. */
646 return __GCONV_ILLEGAL_INPUT
;
648 if (flags
& __GCONV_IGNORE_ERRORS
)
650 /* Just ignore this character. */
657 return __GCONV_ILLEGAL_INPUT
;
660 # if __BYTE_ORDER == __BIG_ENDIAN
661 outptr
[3] = inptr
[0];
662 outptr
[2] = inptr
[1];
663 outptr
[1] = inptr
[2];
664 outptr
[0] = inptr
[3];
666 outptr
[0] = inptr
[0];
667 outptr
[1] = inptr
[1];
668 outptr
[2] = inptr
[2];
669 outptr
[3] = inptr
[3];
678 /* Determine the status. */
679 if (*inptrp
== inend
)
680 result
= __GCONV_EMPTY_INPUT
;
681 else if (*outptrp
+ 4 > outend
)
682 result
= __GCONV_FULL_OUTPUT
;
684 result
= __GCONV_INCOMPLETE_INPUT
;
692 ucs4le_internal_loop_single (struct __gconv_step
*step
,
693 struct __gconv_step_data
*step_data
,
694 const unsigned char **inptrp
,
695 const unsigned char *inend
,
696 unsigned char **outptrp
, unsigned char *outend
,
697 size_t *irreversible
)
699 mbstate_t *state
= step_data
->__statep
;
700 int flags
= step_data
->__flags
;
701 size_t cnt
= state
->__count
& 7;
703 while (*inptrp
< inend
&& cnt
< 4)
704 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
706 if (__builtin_expect (cnt
< 4, 0))
708 /* Still not enough bytes. Store the ones in the input buffer. */
709 state
->__count
&= ~7;
710 state
->__count
|= cnt
;
712 return __GCONV_INCOMPLETE_INPUT
;
715 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
718 /* The value is too large. We don't try transliteration here since
719 this is not an error because of the lack of possibilities to
720 represent the result. This is a genuine bug in the input since
721 UCS4 does not allow such values. */
722 if (!(flags
& __GCONV_IGNORE_ERRORS
))
723 return __GCONV_ILLEGAL_INPUT
;
727 #if __BYTE_ORDER == __BIG_ENDIAN
728 (*outptrp
)[0] = state
->__value
.__wchb
[3];
729 (*outptrp
)[1] = state
->__value
.__wchb
[2];
730 (*outptrp
)[2] = state
->__value
.__wchb
[1];
731 (*outptrp
)[3] = state
->__value
.__wchb
[0];
732 #elif __BYTE_ORDER == __BIG_ENDIAN
733 (*outptrp
)[0] = state
->__value
.__wchb
[0];
734 (*outptrp
)[1] = state
->__value
.__wchb
[1];
735 (*outptrp
)[2] = state
->__value
.__wchb
[2];
736 (*outptrp
)[3] = state
->__value
.__wchb
[3];
742 /* Clear the state buffer. */
743 state
->__count
&= ~7;
748 #include <iconv/skeleton.c>
751 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
752 #define DEFINE_INIT 0
753 #define DEFINE_FINI 0
754 #define MIN_NEEDED_FROM 1
755 #define MIN_NEEDED_TO 4
756 #define FROM_DIRECTION 1
757 #define FROM_LOOP ascii_internal_loop
758 #define TO_LOOP ascii_internal_loop /* This is not used. */
759 #define FUNCTION_NAME __gconv_transform_ascii_internal
760 #define ONE_DIRECTION 1
762 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
763 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
764 #define LOOPFCT FROM_LOOP
767 if (__builtin_expect (*inptr > '\x7f', 0)) \
769 /* The value is too large. We don't try transliteration here since \
770 this is not an error because of the lack of possibilities to \
771 represent the result. This is a genuine bug in the input since \
772 ASCII does not allow such values. */ \
773 if (! ignore_errors_p ()) \
775 /* This is no correct ANSI_X3.4-1968 character. */ \
776 result = __GCONV_ILLEGAL_INPUT; \
784 /* It's an one byte sequence. */ \
785 *((uint32_t *) outptr)++ = *inptr++; \
787 #define LOOP_NEED_FLAGS
788 #include <iconv/loop.c>
789 #include <iconv/skeleton.c>
792 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
793 #define DEFINE_INIT 0
794 #define DEFINE_FINI 0
795 #define MIN_NEEDED_FROM 4
796 #define MIN_NEEDED_TO 1
797 #define FROM_DIRECTION 1
798 #define FROM_LOOP internal_ascii_loop
799 #define TO_LOOP internal_ascii_loop /* This is not used. */
800 #define FUNCTION_NAME __gconv_transform_internal_ascii
801 #define ONE_DIRECTION 1
803 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
804 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
805 #define LOOPFCT FROM_LOOP
808 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
810 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
811 STANDARD_ERR_HANDLER (4); \
814 /* It's an one byte sequence. */ \
815 *outptr++ = *((const uint32_t *) inptr)++; \
817 #define LOOP_NEED_FLAGS
818 #include <iconv/loop.c>
819 #include <iconv/skeleton.c>
822 /* Convert from the internal (UCS4-like) format to UTF-8. */
823 #define DEFINE_INIT 0
824 #define DEFINE_FINI 0
825 #define MIN_NEEDED_FROM 4
826 #define MIN_NEEDED_TO 1
827 #define MAX_NEEDED_TO 6
828 #define FROM_DIRECTION 1
829 #define FROM_LOOP internal_utf8_loop
830 #define TO_LOOP internal_utf8_loop /* This is not used. */
831 #define FUNCTION_NAME __gconv_transform_internal_utf8
832 #define ONE_DIRECTION 1
834 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
835 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
836 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
837 #define LOOPFCT FROM_LOOP
840 uint32_t wc = *((const uint32_t *) inptr); \
843 /* It's an one byte sequence. */ \
844 *outptr++ = (unsigned char) wc; \
845 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
850 for (step = 2; step < 6; ++step) \
851 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
854 if (__builtin_expect (outptr + step > outend, 0)) \
857 result = __GCONV_FULL_OUTPUT; \
862 *outptr = (unsigned char) (~0xff >> step); \
867 start[step] = 0x80 | (wc & 0x3f); \
870 while (--step > 0); \
875 STANDARD_ERR_HANDLER (4); \
880 #define LOOP_NEED_FLAGS
881 #include <iconv/loop.c>
882 #include <iconv/skeleton.c>
885 /* Convert from UTF-8 to the internal (UCS4-like) format. */
886 #define DEFINE_INIT 0
887 #define DEFINE_FINI 0
888 #define MIN_NEEDED_FROM 1
889 #define MAX_NEEDED_FROM 6
890 #define MIN_NEEDED_TO 4
891 #define FROM_DIRECTION 1
892 #define FROM_LOOP utf8_internal_loop
893 #define TO_LOOP utf8_internal_loop /* This is not used. */
894 #define FUNCTION_NAME __gconv_transform_utf8_internal
895 #define ONE_DIRECTION 1
897 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
898 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
899 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
900 #define LOOPFCT FROM_LOOP
907 /* Next input byte. */ \
912 /* One byte sequence. */ \
918 if (ch >= 0xc2 && ch < 0xe0) \
920 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
921 otherwise the wide character could have been represented \
922 using a single byte. */ \
926 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
928 /* We expect three bytes. */ \
932 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
934 /* We expect four bytes. */ \
938 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
940 /* We expect five bytes. */ \
944 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
946 /* We expect six bytes. */ \
954 if (! ignore_errors_p ()) \
956 /* This is an illegal encoding. */ \
957 result = __GCONV_ILLEGAL_INPUT; \
961 /* Search the end of this ill-formed UTF-8 character. This \
962 is the next byte with (x & 0xc0) != 0x80. */ \
969 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
974 if (__builtin_expect (inptr + cnt > inend, 0)) \
976 /* We don't have enough input. But before we report that check \
977 that all the bytes are correct. */ \
978 for (i = 1; inptr + i < inend; ++i) \
979 if ((inptr[i] & 0xc0) != 0x80) \
982 if (__builtin_expect (inptr + i == inend, 1)) \
984 result = __GCONV_INCOMPLETE_INPUT; \
988 if (ignore_errors_p ()) \
996 result = __GCONV_ILLEGAL_INPUT; \
1000 /* Read the possible remaining bytes. */ \
1001 for (i = 1; i < cnt; ++i) \
1003 uint32_t byte = inptr[i]; \
1005 if ((byte & 0xc0) != 0x80) \
1006 /* This is an illegal encoding. */ \
1010 ch |= byte & 0x3f; \
1013 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1014 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1015 have been represented with fewer than cnt bytes. */ \
1016 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1018 /* This is an illegal encoding. */ \
1019 if (ignore_errors_p ()) \
1026 result = __GCONV_ILLEGAL_INPUT; \
1033 /* Now adjust the pointers and store the result. */ \
1034 *((uint32_t *) outptr)++ = ch; \
1036 #define LOOP_NEED_FLAGS
1038 #define STORE_REST \
1040 /* We store the remaining bytes while converting them into the UCS4 \
1041 format. We can assume that the first byte in the buffer is \
1042 correct and that it requires a larger number of bytes than there \
1043 are in the input buffer. */ \
1044 wint_t ch = **inptrp; \
1047 state->__count = inend - *inptrp; \
1049 if (ch >= 0xc2 && ch < 0xe0) \
1051 /* We expect two bytes. The first byte cannot be 0xc0 or \
1052 0xc1, otherwise the wide character could have been \
1053 represented using a single byte. */ \
1057 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1059 /* We expect three bytes. */ \
1063 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1065 /* We expect four bytes. */ \
1069 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1071 /* We expect five bytes. */ \
1077 /* We expect six bytes. */ \
1082 /* The first byte is already consumed. */ \
1084 while (++(*inptrp) < inend) \
1087 ch |= **inptrp & 0x3f; \
1091 /* Shift for the so far missing bytes. */ \
1094 /* Store the value. */ \
1095 state->__value.__wch = ch; \
1098 #define UNPACK_BYTES \
1100 wint_t wch = state->__value.__wch; \
1102 inlen = state->__count; \
1104 if (state->__value.__wch <= 0x7ff) \
1106 bytebuf[0] = 0xc0; \
1109 else if (__builtin_expect (state->__value.__wch <= 0xffff, 1)) \
1111 bytebuf[0] = 0xe0; \
1114 else if (__builtin_expect (state->__value.__wch < 0x1fffff, 1)) \
1116 bytebuf[0] = 0xf0; \
1119 else if (__builtin_expect (state->__value.__wch < 0x3ffffff, 1)) \
1121 bytebuf[0] = 0xf8; \
1126 bytebuf[0] = 0xfc; \
1132 if (--ntotal < inlen) \
1133 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1136 while (ntotal > 1); \
1138 bytebuf[0] |= wch; \
1141 #include <iconv/loop.c>
1142 #include <iconv/skeleton.c>
1145 /* Convert from UCS2 to the internal (UCS4-like) format. */
1146 #define DEFINE_INIT 0
1147 #define DEFINE_FINI 0
1148 #define MIN_NEEDED_FROM 2
1149 #define MIN_NEEDED_TO 4
1150 #define FROM_DIRECTION 1
1151 #define FROM_LOOP ucs2_internal_loop
1152 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1153 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1154 #define ONE_DIRECTION 1
1156 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1157 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1158 #define LOOPFCT FROM_LOOP
1161 uint16_t u1 = *((const uint16_t *) inptr); \
1163 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1165 /* Surrogate characters in UCS-2 input are not valid. Reject \
1166 them. (Catching this here is not security relevant.) */ \
1167 if (! ignore_errors_p ()) \
1169 result = __GCONV_ILLEGAL_INPUT; \
1177 *((uint32_t *) outptr)++ = u1; \
1180 #define LOOP_NEED_FLAGS
1181 #include <iconv/loop.c>
1182 #include <iconv/skeleton.c>
1185 /* Convert from the internal (UCS4-like) format to UCS2. */
1186 #define DEFINE_INIT 0
1187 #define DEFINE_FINI 0
1188 #define MIN_NEEDED_FROM 4
1189 #define MIN_NEEDED_TO 2
1190 #define FROM_DIRECTION 1
1191 #define FROM_LOOP internal_ucs2_loop
1192 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1193 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1194 #define ONE_DIRECTION 1
1196 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1197 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1198 #define LOOPFCT FROM_LOOP
1201 uint32_t val = *((const uint32_t *) inptr); \
1203 if (__builtin_expect (val >= 0x10000, 0)) \
1205 UNICODE_TAG_HANDLER (val, 4); \
1206 STANDARD_ERR_HANDLER (4); \
1208 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1210 /* Surrogate characters in UCS-4 input are not valid. \
1211 We must catch this, because the UCS-2 output might be \
1212 interpreted as UTF-16 by other programs. If we let \
1213 surrogates pass through, attackers could make a security \
1214 hole exploit by synthesizing any desired plane 1-16 \
1216 if (! ignore_errors_p ()) \
1218 result = __GCONV_ILLEGAL_INPUT; \
1227 *((uint16_t *) outptr)++ = val; \
1231 #define LOOP_NEED_FLAGS
1232 #include <iconv/loop.c>
1233 #include <iconv/skeleton.c>
1236 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1237 #define DEFINE_INIT 0
1238 #define DEFINE_FINI 0
1239 #define MIN_NEEDED_FROM 2
1240 #define MIN_NEEDED_TO 4
1241 #define FROM_DIRECTION 1
1242 #define FROM_LOOP ucs2reverse_internal_loop
1243 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1244 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1245 #define ONE_DIRECTION 1
1247 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1248 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1249 #define LOOPFCT FROM_LOOP
1252 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1254 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1256 /* Surrogate characters in UCS-2 input are not valid. Reject \
1257 them. (Catching this here is not security relevant.) */ \
1258 if (! ignore_errors_p ()) \
1260 result = __GCONV_ILLEGAL_INPUT; \
1268 *((uint32_t *) outptr)++ = u1; \
1271 #define LOOP_NEED_FLAGS
1272 #include <iconv/loop.c>
1273 #include <iconv/skeleton.c>
1276 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1277 #define DEFINE_INIT 0
1278 #define DEFINE_FINI 0
1279 #define MIN_NEEDED_FROM 4
1280 #define MIN_NEEDED_TO 2
1281 #define FROM_DIRECTION 1
1282 #define FROM_LOOP internal_ucs2reverse_loop
1283 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1284 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1285 #define ONE_DIRECTION 1
1287 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1288 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1289 #define LOOPFCT FROM_LOOP
1292 uint32_t val = *((const uint32_t *) inptr); \
1293 if (__builtin_expect (val >= 0x10000, 0)) \
1295 UNICODE_TAG_HANDLER (val, 4); \
1296 STANDARD_ERR_HANDLER (4); \
1298 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1300 /* Surrogate characters in UCS-4 input are not valid. \
1301 We must catch this, because the UCS-2 output might be \
1302 interpreted as UTF-16 by other programs. If we let \
1303 surrogates pass through, attackers could make a security \
1304 hole exploit by synthesizing any desired plane 1-16 \
1306 if (! ignore_errors_p ()) \
1308 result = __GCONV_ILLEGAL_INPUT; \
1317 *((uint16_t *) outptr)++ = bswap_16 (val); \
1321 #define LOOP_NEED_FLAGS
1322 #include <iconv/loop.c>
1323 #include <iconv/skeleton.c>