1 /* Simple transformations functions.
2 Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
43 # define EILSEQ EINVAL
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
50 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
75 __attribute ((always_inline
))
76 internal_ucs4_loop (struct __gconv_step
*step
,
77 struct __gconv_step_data
*step_data
,
78 const unsigned char **inptrp
, const unsigned char *inend
,
79 unsigned char **outptrp
, unsigned char *outend
,
82 const unsigned char *inptr
= *inptrp
;
83 unsigned char *outptr
= *outptrp
;
84 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
91 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
92 *((uint32_t *) outptr
)++ = bswap_32 (*(const uint32_t *) inptr
);
96 #elif __BYTE_ORDER == __BIG_ENDIAN
97 /* Simply copy the data. */
98 *inptrp
= inptr
+ n_convert
* 4;
99 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
101 # error "This endianess is not supported."
104 /* Determine the status. */
105 if (*inptrp
== inend
)
106 result
= __GCONV_EMPTY_INPUT
;
107 else if (*outptrp
+ 4 > outend
)
108 result
= __GCONV_FULL_OUTPUT
;
110 result
= __GCONV_INCOMPLETE_INPUT
;
115 #ifndef _STRING_ARCH_unaligned
117 __attribute ((always_inline
))
118 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
119 struct __gconv_step_data
*step_data
,
120 const unsigned char **inptrp
,
121 const unsigned char *inend
,
122 unsigned char **outptrp
, unsigned char *outend
,
123 size_t *irreversible
)
125 const unsigned char *inptr
= *inptrp
;
126 unsigned char *outptr
= *outptrp
;
127 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
130 # if __BYTE_ORDER == __LITTLE_ENDIAN
131 /* Sigh, we have to do some real work. */
134 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
136 outptr
[0] = inptr
[3];
137 outptr
[1] = inptr
[2];
138 outptr
[2] = inptr
[1];
139 outptr
[3] = inptr
[0];
144 # elif __BYTE_ORDER == __BIG_ENDIAN
145 /* Simply copy the data. */
146 *inptrp
= inptr
+ n_convert
* 4;
147 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
149 # error "This endianess is not supported."
152 /* Determine the status. */
153 if (*inptrp
== inend
)
154 result
= __GCONV_EMPTY_INPUT
;
155 else if (*outptrp
+ 4 > outend
)
156 result
= __GCONV_FULL_OUTPUT
;
158 result
= __GCONV_INCOMPLETE_INPUT
;
166 __attribute ((always_inline
))
167 internal_ucs4_loop_single (struct __gconv_step
*step
,
168 struct __gconv_step_data
*step_data
,
169 const unsigned char **inptrp
,
170 const unsigned char *inend
,
171 unsigned char **outptrp
, unsigned char *outend
,
172 size_t *irreversible
)
174 mbstate_t *state
= step_data
->__statep
;
175 size_t cnt
= state
->__count
& 7;
177 while (*inptrp
< inend
&& cnt
< 4)
178 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
180 if (__builtin_expect (cnt
< 4, 0))
182 /* Still not enough bytes. Store the ones in the input buffer. */
183 state
->__count
&= ~7;
184 state
->__count
|= cnt
;
186 return __GCONV_INCOMPLETE_INPUT
;
189 #if __BYTE_ORDER == __LITTLE_ENDIAN
190 (*outptrp
)[0] = state
->__value
.__wchb
[3];
191 (*outptrp
)[1] = state
->__value
.__wchb
[2];
192 (*outptrp
)[2] = state
->__value
.__wchb
[1];
193 (*outptrp
)[3] = state
->__value
.__wchb
[0];
196 #elif __BYTE_ORDER == __BIG_ENDIAN
198 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
200 # error "This endianess is not supported."
203 /* Clear the state buffer. */
204 state
->__count
&= ~7;
209 #include <iconv/skeleton.c>
212 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
213 for the other direction we have to check for correct values here. */
214 #define DEFINE_INIT 0
215 #define DEFINE_FINI 0
216 #define MIN_NEEDED_FROM 4
217 #define MIN_NEEDED_TO 4
218 #define FROM_DIRECTION 1
219 #define FROM_LOOP ucs4_internal_loop
220 #define TO_LOOP ucs4_internal_loop /* This is not used. */
221 #define FUNCTION_NAME __gconv_transform_ucs4_internal
225 __attribute ((always_inline
))
226 ucs4_internal_loop (struct __gconv_step
*step
,
227 struct __gconv_step_data
*step_data
,
228 const unsigned char **inptrp
, const unsigned char *inend
,
229 unsigned char **outptrp
, unsigned char *outend
,
230 size_t *irreversible
)
232 int flags
= step_data
->__flags
;
233 const unsigned char *inptr
= *inptrp
;
234 unsigned char *outptr
= *outptrp
;
235 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
239 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
243 #if __BYTE_ORDER == __LITTLE_ENDIAN
244 inval
= bswap_32 (*(const uint32_t *) inptr
);
246 inval
= *(const uint32_t *) inptr
;
249 if (__builtin_expect (inval
> 0x7fffffff, 0))
251 /* The value is too large. We don't try transliteration here since
252 this is not an error because of the lack of possibilities to
253 represent the result. This is a genuine bug in the input since
254 UCS4 does not allow such values. */
255 if (irreversible
== NULL
)
256 /* We are transliterating, don't try to correct anything. */
257 return __GCONV_ILLEGAL_INPUT
;
259 if (flags
& __GCONV_IGNORE_ERRORS
)
261 /* Just ignore this character. */
268 return __GCONV_ILLEGAL_INPUT
;
271 *((uint32_t *) outptr
)++ = inval
;
277 /* Determine the status. */
278 if (*inptrp
== inend
)
279 result
= __GCONV_EMPTY_INPUT
;
280 else if (*outptrp
+ 4 > outend
)
281 result
= __GCONV_FULL_OUTPUT
;
283 result
= __GCONV_INCOMPLETE_INPUT
;
288 #ifndef _STRING_ARCH_unaligned
290 __attribute ((always_inline
))
291 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
292 struct __gconv_step_data
*step_data
,
293 const unsigned char **inptrp
,
294 const unsigned char *inend
,
295 unsigned char **outptrp
, unsigned char *outend
,
296 size_t *irreversible
)
298 int flags
= step_data
->__flags
;
299 const unsigned char *inptr
= *inptrp
;
300 unsigned char *outptr
= *outptrp
;
301 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
305 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
307 if (__builtin_expect (inptr
[0] > 0x80, 0))
309 /* The value is too large. We don't try transliteration here since
310 this is not an error because of the lack of possibilities to
311 represent the result. This is a genuine bug in the input since
312 UCS4 does not allow such values. */
313 if (irreversible
== NULL
)
314 /* We are transliterating, don't try to correct anything. */
315 return __GCONV_ILLEGAL_INPUT
;
317 if (flags
& __GCONV_IGNORE_ERRORS
)
319 /* Just ignore this character. */
326 return __GCONV_ILLEGAL_INPUT
;
329 # if __BYTE_ORDER == __LITTLE_ENDIAN
330 outptr
[3] = inptr
[0];
331 outptr
[2] = inptr
[1];
332 outptr
[1] = inptr
[2];
333 outptr
[0] = inptr
[3];
335 outptr
[0] = inptr
[0];
336 outptr
[1] = inptr
[1];
337 outptr
[2] = inptr
[2];
338 outptr
[3] = inptr
[3];
346 /* Determine the status. */
347 if (*inptrp
== inend
)
348 result
= __GCONV_EMPTY_INPUT
;
349 else if (*outptrp
+ 4 > outend
)
350 result
= __GCONV_FULL_OUTPUT
;
352 result
= __GCONV_INCOMPLETE_INPUT
;
360 __attribute ((always_inline
))
361 ucs4_internal_loop_single (struct __gconv_step
*step
,
362 struct __gconv_step_data
*step_data
,
363 const unsigned char **inptrp
,
364 const unsigned char *inend
,
365 unsigned char **outptrp
, unsigned char *outend
,
366 size_t *irreversible
)
368 mbstate_t *state
= step_data
->__statep
;
369 int flags
= step_data
->__flags
;
370 size_t cnt
= state
->__count
& 7;
372 while (*inptrp
< inend
&& cnt
< 4)
373 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
375 if (__builtin_expect (cnt
< 4, 0))
377 /* Still not enough bytes. Store the ones in the input buffer. */
378 state
->__count
&= ~7;
379 state
->__count
|= cnt
;
381 return __GCONV_INCOMPLETE_INPUT
;
384 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
387 /* The value is too large. We don't try transliteration here since
388 this is not an error because of the lack of possibilities to
389 represent the result. This is a genuine bug in the input since
390 UCS4 does not allow such values. */
391 if (!(flags
& __GCONV_IGNORE_ERRORS
))
393 *inptrp
-= cnt
- (state
->__count
& 7);
394 return __GCONV_ILLEGAL_INPUT
;
399 #if __BYTE_ORDER == __LITTLE_ENDIAN
400 (*outptrp
)[0] = state
->__value
.__wchb
[3];
401 (*outptrp
)[1] = state
->__value
.__wchb
[2];
402 (*outptrp
)[2] = state
->__value
.__wchb
[1];
403 (*outptrp
)[3] = state
->__value
.__wchb
[0];
404 #elif __BYTE_ORDER == __BIG_ENDIAN
405 (*outptrp
)[0] = state
->__value
.__wchb
[0];
406 (*outptrp
)[1] = state
->__value
.__wchb
[1];
407 (*outptrp
)[2] = state
->__value
.__wchb
[2];
408 (*outptrp
)[3] = state
->__value
.__wchb
[3];
414 /* Clear the state buffer. */
415 state
->__count
&= ~7;
420 #include <iconv/skeleton.c>
423 /* Similarly for the little endian form. */
424 #define DEFINE_INIT 0
425 #define DEFINE_FINI 0
426 #define MIN_NEEDED_FROM 4
427 #define MIN_NEEDED_TO 4
428 #define FROM_DIRECTION 1
429 #define FROM_LOOP internal_ucs4le_loop
430 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
431 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
435 __attribute ((always_inline
))
436 internal_ucs4le_loop (struct __gconv_step
*step
,
437 struct __gconv_step_data
*step_data
,
438 const unsigned char **inptrp
, const unsigned char *inend
,
439 unsigned char **outptrp
, unsigned char *outend
,
440 size_t *irreversible
)
442 const unsigned char *inptr
= *inptrp
;
443 unsigned char *outptr
= *outptrp
;
444 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
447 #if __BYTE_ORDER == __BIG_ENDIAN
448 /* Sigh, we have to do some real work. */
451 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
452 *((uint32_t *) outptr
)++ = bswap_32 (*(const uint32_t *) inptr
);
456 #elif __BYTE_ORDER == __LITTLE_ENDIAN
457 /* Simply copy the data. */
458 *inptrp
= inptr
+ n_convert
* 4;
459 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
461 # error "This endianess is not supported."
464 /* Determine the status. */
465 if (*inptrp
== inend
)
466 result
= __GCONV_EMPTY_INPUT
;
467 else if (*outptrp
+ 4 > outend
)
468 result
= __GCONV_FULL_OUTPUT
;
470 result
= __GCONV_INCOMPLETE_INPUT
;
475 #ifndef _STRING_ARCH_unaligned
477 __attribute ((always_inline
))
478 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
479 struct __gconv_step_data
*step_data
,
480 const unsigned char **inptrp
,
481 const unsigned char *inend
,
482 unsigned char **outptrp
, unsigned char *outend
,
483 size_t *irreversible
)
485 const unsigned char *inptr
= *inptrp
;
486 unsigned char *outptr
= *outptrp
;
487 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
490 # if __BYTE_ORDER == __BIG_ENDIAN
491 /* Sigh, we have to do some real work. */
494 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
496 outptr
[0] = inptr
[3];
497 outptr
[1] = inptr
[2];
498 outptr
[2] = inptr
[1];
499 outptr
[3] = inptr
[0];
504 # elif __BYTE_ORDER == __LITTLE_ENDIAN
505 /* Simply copy the data. */
506 *inptrp
= inptr
+ n_convert
* 4;
507 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
509 # error "This endianess is not supported."
512 /* Determine the status. */
513 if (*inptrp
== inend
)
514 result
= __GCONV_EMPTY_INPUT
;
515 else if (*inptrp
+ 4 > inend
)
516 result
= __GCONV_INCOMPLETE_INPUT
;
519 assert (*outptrp
+ 4 > outend
);
520 result
= __GCONV_FULL_OUTPUT
;
529 __attribute ((always_inline
))
530 internal_ucs4le_loop_single (struct __gconv_step
*step
,
531 struct __gconv_step_data
*step_data
,
532 const unsigned char **inptrp
,
533 const unsigned char *inend
,
534 unsigned char **outptrp
, unsigned char *outend
,
535 size_t *irreversible
)
537 mbstate_t *state
= step_data
->__statep
;
538 size_t cnt
= state
->__count
& 7;
540 while (*inptrp
< inend
&& cnt
< 4)
541 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
543 if (__builtin_expect (cnt
< 4, 0))
545 /* Still not enough bytes. Store the ones in the input buffer. */
546 state
->__count
&= ~7;
547 state
->__count
|= cnt
;
549 return __GCONV_INCOMPLETE_INPUT
;
552 #if __BYTE_ORDER == __BIG_ENDIAN
553 (*outptrp
)[0] = state
->__value
.__wchb
[3];
554 (*outptrp
)[1] = state
->__value
.__wchb
[2];
555 (*outptrp
)[2] = state
->__value
.__wchb
[1];
556 (*outptrp
)[3] = state
->__value
.__wchb
[0];
561 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
564 /* Clear the state buffer. */
565 state
->__count
&= ~7;
570 #include <iconv/skeleton.c>
573 /* And finally from UCS4-LE to the internal encoding. */
574 #define DEFINE_INIT 0
575 #define DEFINE_FINI 0
576 #define MIN_NEEDED_FROM 4
577 #define MIN_NEEDED_TO 4
578 #define FROM_DIRECTION 1
579 #define FROM_LOOP ucs4le_internal_loop
580 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
581 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
585 __attribute ((always_inline
))
586 ucs4le_internal_loop (struct __gconv_step
*step
,
587 struct __gconv_step_data
*step_data
,
588 const unsigned char **inptrp
, const unsigned char *inend
,
589 unsigned char **outptrp
, unsigned char *outend
,
590 size_t *irreversible
)
592 int flags
= step_data
->__flags
;
593 const unsigned char *inptr
= *inptrp
;
594 unsigned char *outptr
= *outptrp
;
595 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
599 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
603 #if __BYTE_ORDER == __BIG_ENDIAN
604 inval
= bswap_32 (*(const uint32_t *) inptr
);
606 inval
= *(const uint32_t *) inptr
;
609 if (__builtin_expect (inval
> 0x7fffffff, 0))
611 /* The value is too large. We don't try transliteration here since
612 this is not an error because of the lack of possibilities to
613 represent the result. This is a genuine bug in the input since
614 UCS4 does not allow such values. */
615 if (irreversible
== NULL
)
616 /* We are transliterating, don't try to correct anything. */
617 return __GCONV_ILLEGAL_INPUT
;
619 if (flags
& __GCONV_IGNORE_ERRORS
)
621 /* Just ignore this character. */
626 return __GCONV_ILLEGAL_INPUT
;
629 *((uint32_t *) outptr
)++ = inval
;
635 /* Determine the status. */
636 if (*inptrp
== inend
)
637 result
= __GCONV_EMPTY_INPUT
;
638 else if (*inptrp
+ 4 > inend
)
639 result
= __GCONV_INCOMPLETE_INPUT
;
642 assert (*outptrp
+ 4 > outend
);
643 result
= __GCONV_FULL_OUTPUT
;
649 #ifndef _STRING_ARCH_unaligned
651 __attribute ((always_inline
))
652 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
653 struct __gconv_step_data
*step_data
,
654 const unsigned char **inptrp
,
655 const unsigned char *inend
,
656 unsigned char **outptrp
, unsigned char *outend
,
657 size_t *irreversible
)
659 int flags
= step_data
->__flags
;
660 const unsigned char *inptr
= *inptrp
;
661 unsigned char *outptr
= *outptrp
;
662 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
666 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
668 if (__builtin_expect (inptr
[3] > 0x80, 0))
670 /* The value is too large. We don't try transliteration here since
671 this is not an error because of the lack of possibilities to
672 represent the result. This is a genuine bug in the input since
673 UCS4 does not allow such values. */
674 if (irreversible
== NULL
)
675 /* We are transliterating, don't try to correct anything. */
676 return __GCONV_ILLEGAL_INPUT
;
678 if (flags
& __GCONV_IGNORE_ERRORS
)
680 /* Just ignore this character. */
687 return __GCONV_ILLEGAL_INPUT
;
690 # if __BYTE_ORDER == __BIG_ENDIAN
691 outptr
[3] = inptr
[0];
692 outptr
[2] = inptr
[1];
693 outptr
[1] = inptr
[2];
694 outptr
[0] = inptr
[3];
696 outptr
[0] = inptr
[0];
697 outptr
[1] = inptr
[1];
698 outptr
[2] = inptr
[2];
699 outptr
[3] = inptr
[3];
708 /* Determine the status. */
709 if (*inptrp
== inend
)
710 result
= __GCONV_EMPTY_INPUT
;
711 else if (*inptrp
+ 4 > inend
)
712 result
= __GCONV_INCOMPLETE_INPUT
;
715 assert (*outptrp
+ 4 > outend
);
716 result
= __GCONV_FULL_OUTPUT
;
725 __attribute ((always_inline
))
726 ucs4le_internal_loop_single (struct __gconv_step
*step
,
727 struct __gconv_step_data
*step_data
,
728 const unsigned char **inptrp
,
729 const unsigned char *inend
,
730 unsigned char **outptrp
, unsigned char *outend
,
731 size_t *irreversible
)
733 mbstate_t *state
= step_data
->__statep
;
734 int flags
= step_data
->__flags
;
735 size_t cnt
= state
->__count
& 7;
737 while (*inptrp
< inend
&& cnt
< 4)
738 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
740 if (__builtin_expect (cnt
< 4, 0))
742 /* Still not enough bytes. Store the ones in the input buffer. */
743 state
->__count
&= ~7;
744 state
->__count
|= cnt
;
746 return __GCONV_INCOMPLETE_INPUT
;
749 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
752 /* The value is too large. We don't try transliteration here since
753 this is not an error because of the lack of possibilities to
754 represent the result. This is a genuine bug in the input since
755 UCS4 does not allow such values. */
756 if (!(flags
& __GCONV_IGNORE_ERRORS
))
757 return __GCONV_ILLEGAL_INPUT
;
761 #if __BYTE_ORDER == __BIG_ENDIAN
762 (*outptrp
)[0] = state
->__value
.__wchb
[3];
763 (*outptrp
)[1] = state
->__value
.__wchb
[2];
764 (*outptrp
)[2] = state
->__value
.__wchb
[1];
765 (*outptrp
)[3] = state
->__value
.__wchb
[0];
767 (*outptrp
)[0] = state
->__value
.__wchb
[0];
768 (*outptrp
)[1] = state
->__value
.__wchb
[1];
769 (*outptrp
)[2] = state
->__value
.__wchb
[2];
770 (*outptrp
)[3] = state
->__value
.__wchb
[3];
776 /* Clear the state buffer. */
777 state
->__count
&= ~7;
782 #include <iconv/skeleton.c>
785 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
786 #define DEFINE_INIT 0
787 #define DEFINE_FINI 0
788 #define MIN_NEEDED_FROM 1
789 #define MIN_NEEDED_TO 4
790 #define FROM_DIRECTION 1
791 #define FROM_LOOP ascii_internal_loop
792 #define TO_LOOP ascii_internal_loop /* This is not used. */
793 #define FUNCTION_NAME __gconv_transform_ascii_internal
794 #define ONE_DIRECTION 1
796 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
797 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
798 #define LOOPFCT FROM_LOOP
801 if (__builtin_expect (*inptr > '\x7f', 0)) \
803 /* The value is too large. We don't try transliteration here since \
804 this is not an error because of the lack of possibilities to \
805 represent the result. This is a genuine bug in the input since \
806 ASCII does not allow such values. */ \
807 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
810 /* It's an one byte sequence. */ \
811 *((uint32_t *) outptr)++ = *inptr++; \
813 #define LOOP_NEED_FLAGS
814 #include <iconv/loop.c>
815 #include <iconv/skeleton.c>
818 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
819 #define DEFINE_INIT 0
820 #define DEFINE_FINI 0
821 #define MIN_NEEDED_FROM 4
822 #define MIN_NEEDED_TO 1
823 #define FROM_DIRECTION 1
824 #define FROM_LOOP internal_ascii_loop
825 #define TO_LOOP internal_ascii_loop /* This is not used. */
826 #define FUNCTION_NAME __gconv_transform_internal_ascii
827 #define ONE_DIRECTION 1
829 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
830 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
831 #define LOOPFCT FROM_LOOP
834 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
836 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
837 STANDARD_TO_LOOP_ERR_HANDLER (4); \
840 /* It's an one byte sequence. */ \
841 *outptr++ = *((const uint32_t *) inptr)++; \
843 #define LOOP_NEED_FLAGS
844 #include <iconv/loop.c>
845 #include <iconv/skeleton.c>
848 /* Convert from the internal (UCS4-like) format to UTF-8. */
849 #define DEFINE_INIT 0
850 #define DEFINE_FINI 0
851 #define MIN_NEEDED_FROM 4
852 #define MIN_NEEDED_TO 1
853 #define MAX_NEEDED_TO 6
854 #define FROM_DIRECTION 1
855 #define FROM_LOOP internal_utf8_loop
856 #define TO_LOOP internal_utf8_loop /* This is not used. */
857 #define FUNCTION_NAME __gconv_transform_internal_utf8
858 #define ONE_DIRECTION 1
860 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
861 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
862 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
863 #define LOOPFCT FROM_LOOP
866 uint32_t wc = *((const uint32_t *) inptr); \
869 /* It's an one byte sequence. */ \
870 *outptr++ = (unsigned char) wc; \
871 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
876 for (step = 2; step < 6; ++step) \
877 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
880 if (__builtin_expect (outptr + step > outend, 0)) \
883 result = __GCONV_FULL_OUTPUT; \
888 *outptr = (unsigned char) (~0xff >> step); \
893 start[step] = 0x80 | (wc & 0x3f); \
896 while (--step > 0); \
901 STANDARD_TO_LOOP_ERR_HANDLER (4); \
906 #define LOOP_NEED_FLAGS
907 #include <iconv/loop.c>
908 #include <iconv/skeleton.c>
911 /* Convert from UTF-8 to the internal (UCS4-like) format. */
912 #define DEFINE_INIT 0
913 #define DEFINE_FINI 0
914 #define MIN_NEEDED_FROM 1
915 #define MAX_NEEDED_FROM 6
916 #define MIN_NEEDED_TO 4
917 #define FROM_DIRECTION 1
918 #define FROM_LOOP utf8_internal_loop
919 #define TO_LOOP utf8_internal_loop /* This is not used. */
920 #define FUNCTION_NAME __gconv_transform_utf8_internal
921 #define ONE_DIRECTION 1
923 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
924 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
925 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
926 #define LOOPFCT FROM_LOOP
933 /* Next input byte. */ \
938 /* One byte sequence. */ \
944 if (ch >= 0xc2 && ch < 0xe0) \
946 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
947 otherwise the wide character could have been represented \
948 using a single byte. */ \
952 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
954 /* We expect three bytes. */ \
958 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
960 /* We expect four bytes. */ \
964 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
966 /* We expect five bytes. */ \
970 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
972 /* We expect six bytes. */ \
980 /* Search the end of this ill-formed UTF-8 character. This \
981 is the next byte with (x & 0xc0) != 0x80. */ \
985 while (inptr + skipped < inend \
986 && (*(inptr + skipped) & 0xc0) == 0x80 \
989 STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
992 if (__builtin_expect (inptr + cnt > inend, 0)) \
994 /* We don't have enough input. But before we report that check \
995 that all the bytes are correct. */ \
996 for (i = 1; inptr + i < inend; ++i) \
997 if ((inptr[i] & 0xc0) != 0x80) \
1000 if (__builtin_expect (inptr + i == inend, 1)) \
1002 result = __GCONV_INCOMPLETE_INPUT; \
1006 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1009 /* Read the possible remaining bytes. */ \
1010 for (i = 1; i < cnt; ++i) \
1012 uint32_t byte = inptr[i]; \
1014 if ((byte & 0xc0) != 0x80) \
1015 /* This is an illegal encoding. */ \
1019 ch |= byte & 0x3f; \
1022 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1023 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1024 have been represented with fewer than cnt bytes. */ \
1025 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1027 /* This is an illegal encoding. */ \
1028 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1034 /* Now adjust the pointers and store the result. */ \
1035 *((uint32_t *) outptr)++ = ch; \
1037 #define LOOP_NEED_FLAGS
1039 #define STORE_REST \
1041 /* We store the remaining bytes while converting them into the UCS4 \
1042 format. We can assume that the first byte in the buffer is \
1043 correct and that it requires a larger number of bytes than there \
1044 are in the input buffer. */ \
1045 wint_t ch = **inptrp; \
1048 state->__count = inend - *inptrp; \
1050 if (ch >= 0xc2 && ch < 0xe0) \
1052 /* We expect two bytes. The first byte cannot be 0xc0 or \
1053 0xc1, otherwise the wide character could have been \
1054 represented using a single byte. */ \
1058 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1060 /* We expect three bytes. */ \
1064 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1066 /* We expect four bytes. */ \
1070 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1072 /* We expect five bytes. */ \
1078 /* We expect six bytes. */ \
1083 /* The first byte is already consumed. */ \
1085 while (++(*inptrp) < inend) \
1088 ch |= **inptrp & 0x3f; \
1092 /* Shift for the so far missing bytes. */ \
1095 /* Store the number of bytes expected for the entire sequence. */ \
1096 state->__count |= cnt << 8; \
1098 /* Store the value. */ \
1099 state->__value.__wch = ch; \
1102 #define UNPACK_BYTES \
1104 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1105 wint_t wch = state->__value.__wch; \
1106 size_t ntotal = state->__count >> 8; \
1108 inlen = state->__count & 255; \
1110 bytebuf[0] = inmask[ntotal - 2]; \
1114 if (--ntotal < inlen) \
1115 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1118 while (ntotal > 1); \
1120 bytebuf[0] |= wch; \
1123 #define CLEAR_STATE \
1127 #include <iconv/loop.c>
1128 #include <iconv/skeleton.c>
1131 /* Convert from UCS2 to the internal (UCS4-like) format. */
1132 #define DEFINE_INIT 0
1133 #define DEFINE_FINI 0
1134 #define MIN_NEEDED_FROM 2
1135 #define MIN_NEEDED_TO 4
1136 #define FROM_DIRECTION 1
1137 #define FROM_LOOP ucs2_internal_loop
1138 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1139 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1140 #define ONE_DIRECTION 1
1142 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1143 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1144 #define LOOPFCT FROM_LOOP
1147 uint16_t u1 = *((const uint16_t *) inptr); \
1149 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1151 /* Surrogate characters in UCS-2 input are not valid. Reject \
1152 them. (Catching this here is not security relevant.) */ \
1153 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1156 *((uint32_t *) outptr)++ = u1; \
1159 #define LOOP_NEED_FLAGS
1160 #include <iconv/loop.c>
1161 #include <iconv/skeleton.c>
1164 /* Convert from the internal (UCS4-like) format to UCS2. */
1165 #define DEFINE_INIT 0
1166 #define DEFINE_FINI 0
1167 #define MIN_NEEDED_FROM 4
1168 #define MIN_NEEDED_TO 2
1169 #define FROM_DIRECTION 1
1170 #define FROM_LOOP internal_ucs2_loop
1171 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1172 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1173 #define ONE_DIRECTION 1
1175 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1176 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1177 #define LOOPFCT FROM_LOOP
1180 uint32_t val = *((const uint32_t *) inptr); \
1182 if (__builtin_expect (val >= 0x10000, 0)) \
1184 UNICODE_TAG_HANDLER (val, 4); \
1185 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1187 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1189 /* Surrogate characters in UCS-4 input are not valid. \
1190 We must catch this, because the UCS-2 output might be \
1191 interpreted as UTF-16 by other programs. If we let \
1192 surrogates pass through, attackers could make a security \
1193 hole exploit by synthesizing any desired plane 1-16 \
1195 result = __GCONV_ILLEGAL_INPUT; \
1196 if (! ignore_errors_p ()) \
1204 *((uint16_t *) outptr)++ = val; \
1208 #define LOOP_NEED_FLAGS
1209 #include <iconv/loop.c>
1210 #include <iconv/skeleton.c>
1213 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1214 #define DEFINE_INIT 0
1215 #define DEFINE_FINI 0
1216 #define MIN_NEEDED_FROM 2
1217 #define MIN_NEEDED_TO 4
1218 #define FROM_DIRECTION 1
1219 #define FROM_LOOP ucs2reverse_internal_loop
1220 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1221 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1222 #define ONE_DIRECTION 1
1224 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1225 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1226 #define LOOPFCT FROM_LOOP
1229 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1231 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1233 /* Surrogate characters in UCS-2 input are not valid. Reject \
1234 them. (Catching this here is not security relevant.) */ \
1235 if (! ignore_errors_p ()) \
1237 result = __GCONV_ILLEGAL_INPUT; \
1245 *((uint32_t *) outptr)++ = u1; \
1248 #define LOOP_NEED_FLAGS
1249 #include <iconv/loop.c>
1250 #include <iconv/skeleton.c>
1253 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1254 #define DEFINE_INIT 0
1255 #define DEFINE_FINI 0
1256 #define MIN_NEEDED_FROM 4
1257 #define MIN_NEEDED_TO 2
1258 #define FROM_DIRECTION 1
1259 #define FROM_LOOP internal_ucs2reverse_loop
1260 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1261 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1262 #define ONE_DIRECTION 1
1264 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1265 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1266 #define LOOPFCT FROM_LOOP
1269 uint32_t val = *((const uint32_t *) inptr); \
1270 if (__builtin_expect (val >= 0x10000, 0)) \
1272 UNICODE_TAG_HANDLER (val, 4); \
1273 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1275 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1277 /* Surrogate characters in UCS-4 input are not valid. \
1278 We must catch this, because the UCS-2 output might be \
1279 interpreted as UTF-16 by other programs. If we let \
1280 surrogates pass through, attackers could make a security \
1281 hole exploit by synthesizing any desired plane 1-16 \
1283 if (! ignore_errors_p ()) \
1285 result = __GCONV_ILLEGAL_INPUT; \
1294 *((uint16_t *) outptr)++ = bswap_16 (val); \
1298 #define LOOP_NEED_FLAGS
1299 #include <iconv/loop.c>
1300 #include <iconv/skeleton.c>