1 /* Simple transformations functions.
2 Copyright (C) 1997-2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
29 #include <sys/param.h>
30 #include <gconv_int.h>
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 const unsigned char **, const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
42 # define EILSEQ EINVAL
46 /* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
49 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
58 /* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
65 #define MIN_NEEDED_FROM 4
66 #define MIN_NEEDED_TO 4
67 #define FROM_DIRECTION 1
68 #define FROM_LOOP internal_ucs4_loop
69 #define TO_LOOP internal_ucs4_loop /* This is not used. */
70 #define FUNCTION_NAME __gconv_transform_internal_ucs4
74 __attribute ((always_inline
))
75 internal_ucs4_loop (struct __gconv_step
*step
,
76 struct __gconv_step_data
*step_data
,
77 const unsigned char **inptrp
, const unsigned char *inend
,
78 unsigned char **outptrp
, unsigned char *outend
,
81 const unsigned char *inptr
= *inptrp
;
82 unsigned char *outptr
= *outptrp
;
83 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 /* Sigh, we have to do some real work. */
89 uint32_t *outptr32
= (uint32_t *) outptr
;
91 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
92 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
95 *outptrp
= (unsigned char *) outptr32
;
96 #elif __BYTE_ORDER == __BIG_ENDIAN
97 /* Simply copy the data. */
98 *inptrp
= inptr
+ n_convert
* 4;
99 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
101 # error "This endianess is not supported."
104 /* Determine the status. */
105 if (*inptrp
== inend
)
106 result
= __GCONV_EMPTY_INPUT
;
107 else if (*outptrp
+ 4 > outend
)
108 result
= __GCONV_FULL_OUTPUT
;
110 result
= __GCONV_INCOMPLETE_INPUT
;
115 #ifndef _STRING_ARCH_unaligned
117 __attribute ((always_inline
))
118 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
119 struct __gconv_step_data
*step_data
,
120 const unsigned char **inptrp
,
121 const unsigned char *inend
,
122 unsigned char **outptrp
, unsigned char *outend
,
123 size_t *irreversible
)
125 const unsigned char *inptr
= *inptrp
;
126 unsigned char *outptr
= *outptrp
;
127 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
130 # if __BYTE_ORDER == __LITTLE_ENDIAN
131 /* Sigh, we have to do some real work. */
134 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
136 outptr
[0] = inptr
[3];
137 outptr
[1] = inptr
[2];
138 outptr
[2] = inptr
[1];
139 outptr
[3] = inptr
[0];
144 # elif __BYTE_ORDER == __BIG_ENDIAN
145 /* Simply copy the data. */
146 *inptrp
= inptr
+ n_convert
* 4;
147 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
149 # error "This endianess is not supported."
152 /* Determine the status. */
153 if (*inptrp
== inend
)
154 result
= __GCONV_EMPTY_INPUT
;
155 else if (*outptrp
+ 4 > outend
)
156 result
= __GCONV_FULL_OUTPUT
;
158 result
= __GCONV_INCOMPLETE_INPUT
;
166 __attribute ((always_inline
))
167 internal_ucs4_loop_single (struct __gconv_step
*step
,
168 struct __gconv_step_data
*step_data
,
169 const unsigned char **inptrp
,
170 const unsigned char *inend
,
171 unsigned char **outptrp
, unsigned char *outend
,
172 size_t *irreversible
)
174 mbstate_t *state
= step_data
->__statep
;
175 size_t cnt
= state
->__count
& 7;
177 while (*inptrp
< inend
&& cnt
< 4)
178 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
180 if (__builtin_expect (cnt
< 4, 0))
182 /* Still not enough bytes. Store the ones in the input buffer. */
183 state
->__count
&= ~7;
184 state
->__count
|= cnt
;
186 return __GCONV_INCOMPLETE_INPUT
;
189 #if __BYTE_ORDER == __LITTLE_ENDIAN
190 (*outptrp
)[0] = state
->__value
.__wchb
[3];
191 (*outptrp
)[1] = state
->__value
.__wchb
[2];
192 (*outptrp
)[2] = state
->__value
.__wchb
[1];
193 (*outptrp
)[3] = state
->__value
.__wchb
[0];
195 #elif __BYTE_ORDER == __BIG_ENDIAN
197 (*outptrp
)[0] = state
->__value
.__wchb
[0];
198 (*outptrp
)[1] = state
->__value
.__wchb
[1];
199 (*outptrp
)[2] = state
->__value
.__wchb
[2];
200 (*outptrp
)[3] = state
->__value
.__wchb
[3];
202 # error "This endianess is not supported."
206 /* Clear the state buffer. */
207 state
->__count
&= ~7;
212 #include <iconv/skeleton.c>
215 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
216 for the other direction we have to check for correct values here. */
217 #define DEFINE_INIT 0
218 #define DEFINE_FINI 0
219 #define MIN_NEEDED_FROM 4
220 #define MIN_NEEDED_TO 4
221 #define FROM_DIRECTION 1
222 #define FROM_LOOP ucs4_internal_loop
223 #define TO_LOOP ucs4_internal_loop /* This is not used. */
224 #define FUNCTION_NAME __gconv_transform_ucs4_internal
228 __attribute ((always_inline
))
229 ucs4_internal_loop (struct __gconv_step
*step
,
230 struct __gconv_step_data
*step_data
,
231 const unsigned char **inptrp
, const unsigned char *inend
,
232 unsigned char **outptrp
, unsigned char *outend
,
233 size_t *irreversible
)
235 int flags
= step_data
->__flags
;
236 const unsigned char *inptr
= *inptrp
;
237 unsigned char *outptr
= *outptrp
;
238 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
242 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
246 #if __BYTE_ORDER == __LITTLE_ENDIAN
247 inval
= bswap_32 (*(const uint32_t *) inptr
);
249 inval
= *(const uint32_t *) inptr
;
252 if (__builtin_expect (inval
> 0x7fffffff, 0))
254 /* The value is too large. We don't try transliteration here since
255 this is not an error because of the lack of possibilities to
256 represent the result. This is a genuine bug in the input since
257 UCS4 does not allow such values. */
258 if (irreversible
== NULL
)
259 /* We are transliterating, don't try to correct anything. */
260 return __GCONV_ILLEGAL_INPUT
;
262 if (flags
& __GCONV_IGNORE_ERRORS
)
264 /* Just ignore this character. */
271 return __GCONV_ILLEGAL_INPUT
;
274 *((uint32_t *) outptr
) = inval
;
275 outptr
+= sizeof (uint32_t);
281 /* Determine the status. */
282 if (*inptrp
== inend
)
283 result
= __GCONV_EMPTY_INPUT
;
284 else if (*outptrp
+ 4 > outend
)
285 result
= __GCONV_FULL_OUTPUT
;
287 result
= __GCONV_INCOMPLETE_INPUT
;
292 #ifndef _STRING_ARCH_unaligned
294 __attribute ((always_inline
))
295 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
296 struct __gconv_step_data
*step_data
,
297 const unsigned char **inptrp
,
298 const unsigned char *inend
,
299 unsigned char **outptrp
, unsigned char *outend
,
300 size_t *irreversible
)
302 int flags
= step_data
->__flags
;
303 const unsigned char *inptr
= *inptrp
;
304 unsigned char *outptr
= *outptrp
;
305 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
309 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
311 if (__builtin_expect (inptr
[0] > 0x80, 0))
313 /* The value is too large. We don't try transliteration here since
314 this is not an error because of the lack of possibilities to
315 represent the result. This is a genuine bug in the input since
316 UCS4 does not allow such values. */
317 if (irreversible
== NULL
)
318 /* We are transliterating, don't try to correct anything. */
319 return __GCONV_ILLEGAL_INPUT
;
321 if (flags
& __GCONV_IGNORE_ERRORS
)
323 /* Just ignore this character. */
330 return __GCONV_ILLEGAL_INPUT
;
333 # if __BYTE_ORDER == __LITTLE_ENDIAN
334 outptr
[3] = inptr
[0];
335 outptr
[2] = inptr
[1];
336 outptr
[1] = inptr
[2];
337 outptr
[0] = inptr
[3];
339 outptr
[0] = inptr
[0];
340 outptr
[1] = inptr
[1];
341 outptr
[2] = inptr
[2];
342 outptr
[3] = inptr
[3];
350 /* Determine the status. */
351 if (*inptrp
== inend
)
352 result
= __GCONV_EMPTY_INPUT
;
353 else if (*outptrp
+ 4 > outend
)
354 result
= __GCONV_FULL_OUTPUT
;
356 result
= __GCONV_INCOMPLETE_INPUT
;
364 __attribute ((always_inline
))
365 ucs4_internal_loop_single (struct __gconv_step
*step
,
366 struct __gconv_step_data
*step_data
,
367 const unsigned char **inptrp
,
368 const unsigned char *inend
,
369 unsigned char **outptrp
, unsigned char *outend
,
370 size_t *irreversible
)
372 mbstate_t *state
= step_data
->__statep
;
373 int flags
= step_data
->__flags
;
374 size_t cnt
= state
->__count
& 7;
376 while (*inptrp
< inend
&& cnt
< 4)
377 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
379 if (__builtin_expect (cnt
< 4, 0))
381 /* Still not enough bytes. Store the ones in the input buffer. */
382 state
->__count
&= ~7;
383 state
->__count
|= cnt
;
385 return __GCONV_INCOMPLETE_INPUT
;
388 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
391 /* The value is too large. We don't try transliteration here since
392 this is not an error because of the lack of possibilities to
393 represent the result. This is a genuine bug in the input since
394 UCS4 does not allow such values. */
395 if (!(flags
& __GCONV_IGNORE_ERRORS
))
397 *inptrp
-= cnt
- (state
->__count
& 7);
398 return __GCONV_ILLEGAL_INPUT
;
403 #if __BYTE_ORDER == __LITTLE_ENDIAN
404 (*outptrp
)[0] = state
->__value
.__wchb
[3];
405 (*outptrp
)[1] = state
->__value
.__wchb
[2];
406 (*outptrp
)[2] = state
->__value
.__wchb
[1];
407 (*outptrp
)[3] = state
->__value
.__wchb
[0];
408 #elif __BYTE_ORDER == __BIG_ENDIAN
409 (*outptrp
)[0] = state
->__value
.__wchb
[0];
410 (*outptrp
)[1] = state
->__value
.__wchb
[1];
411 (*outptrp
)[2] = state
->__value
.__wchb
[2];
412 (*outptrp
)[3] = state
->__value
.__wchb
[3];
418 /* Clear the state buffer. */
419 state
->__count
&= ~7;
424 #include <iconv/skeleton.c>
427 /* Similarly for the little endian form. */
428 #define DEFINE_INIT 0
429 #define DEFINE_FINI 0
430 #define MIN_NEEDED_FROM 4
431 #define MIN_NEEDED_TO 4
432 #define FROM_DIRECTION 1
433 #define FROM_LOOP internal_ucs4le_loop
434 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
435 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
439 __attribute ((always_inline
))
440 internal_ucs4le_loop (struct __gconv_step
*step
,
441 struct __gconv_step_data
*step_data
,
442 const unsigned char **inptrp
, const unsigned char *inend
,
443 unsigned char **outptrp
, unsigned char *outend
,
444 size_t *irreversible
)
446 const unsigned char *inptr
= *inptrp
;
447 unsigned char *outptr
= *outptrp
;
448 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
451 #if __BYTE_ORDER == __BIG_ENDIAN
452 /* Sigh, we have to do some real work. */
454 uint32_t *outptr32
= (uint32_t *) outptr
;
456 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
457 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
458 outptr
= (unsigned char *) outptr32
;
462 #elif __BYTE_ORDER == __LITTLE_ENDIAN
463 /* Simply copy the data. */
464 *inptrp
= inptr
+ n_convert
* 4;
465 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
467 # error "This endianess is not supported."
470 /* Determine the status. */
471 if (*inptrp
== inend
)
472 result
= __GCONV_EMPTY_INPUT
;
473 else if (*outptrp
+ 4 > outend
)
474 result
= __GCONV_FULL_OUTPUT
;
476 result
= __GCONV_INCOMPLETE_INPUT
;
481 #ifndef _STRING_ARCH_unaligned
483 __attribute ((always_inline
))
484 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
485 struct __gconv_step_data
*step_data
,
486 const unsigned char **inptrp
,
487 const unsigned char *inend
,
488 unsigned char **outptrp
, unsigned char *outend
,
489 size_t *irreversible
)
491 const unsigned char *inptr
= *inptrp
;
492 unsigned char *outptr
= *outptrp
;
493 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
496 # if __BYTE_ORDER == __BIG_ENDIAN
497 /* Sigh, we have to do some real work. */
500 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
502 outptr
[0] = inptr
[3];
503 outptr
[1] = inptr
[2];
504 outptr
[2] = inptr
[1];
505 outptr
[3] = inptr
[0];
510 # elif __BYTE_ORDER == __LITTLE_ENDIAN
511 /* Simply copy the data. */
512 *inptrp
= inptr
+ n_convert
* 4;
513 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
515 # error "This endianess is not supported."
518 /* Determine the status. */
519 if (*inptrp
== inend
)
520 result
= __GCONV_EMPTY_INPUT
;
521 else if (*inptrp
+ 4 > inend
)
522 result
= __GCONV_INCOMPLETE_INPUT
;
525 assert (*outptrp
+ 4 > outend
);
526 result
= __GCONV_FULL_OUTPUT
;
535 __attribute ((always_inline
))
536 internal_ucs4le_loop_single (struct __gconv_step
*step
,
537 struct __gconv_step_data
*step_data
,
538 const unsigned char **inptrp
,
539 const unsigned char *inend
,
540 unsigned char **outptrp
, unsigned char *outend
,
541 size_t *irreversible
)
543 mbstate_t *state
= step_data
->__statep
;
544 size_t cnt
= state
->__count
& 7;
546 while (*inptrp
< inend
&& cnt
< 4)
547 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
549 if (__builtin_expect (cnt
< 4, 0))
551 /* Still not enough bytes. Store the ones in the input buffer. */
552 state
->__count
&= ~7;
553 state
->__count
|= cnt
;
555 return __GCONV_INCOMPLETE_INPUT
;
558 #if __BYTE_ORDER == __BIG_ENDIAN
559 (*outptrp
)[0] = state
->__value
.__wchb
[3];
560 (*outptrp
)[1] = state
->__value
.__wchb
[2];
561 (*outptrp
)[2] = state
->__value
.__wchb
[1];
562 (*outptrp
)[3] = state
->__value
.__wchb
[0];
566 (*outptrp
)[0] = state
->__value
.__wchb
[0];
567 (*outptrp
)[1] = state
->__value
.__wchb
[1];
568 (*outptrp
)[2] = state
->__value
.__wchb
[2];
569 (*outptrp
)[3] = state
->__value
.__wchb
[3];
575 /* Clear the state buffer. */
576 state
->__count
&= ~7;
581 #include <iconv/skeleton.c>
584 /* And finally from UCS4-LE to the internal encoding. */
585 #define DEFINE_INIT 0
586 #define DEFINE_FINI 0
587 #define MIN_NEEDED_FROM 4
588 #define MIN_NEEDED_TO 4
589 #define FROM_DIRECTION 1
590 #define FROM_LOOP ucs4le_internal_loop
591 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
592 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
596 __attribute ((always_inline
))
597 ucs4le_internal_loop (struct __gconv_step
*step
,
598 struct __gconv_step_data
*step_data
,
599 const unsigned char **inptrp
, const unsigned char *inend
,
600 unsigned char **outptrp
, unsigned char *outend
,
601 size_t *irreversible
)
603 int flags
= step_data
->__flags
;
604 const unsigned char *inptr
= *inptrp
;
605 unsigned char *outptr
= *outptrp
;
606 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
610 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
614 #if __BYTE_ORDER == __BIG_ENDIAN
615 inval
= bswap_32 (*(const uint32_t *) inptr
);
617 inval
= *(const uint32_t *) inptr
;
620 if (__builtin_expect (inval
> 0x7fffffff, 0))
622 /* The value is too large. We don't try transliteration here since
623 this is not an error because of the lack of possibilities to
624 represent the result. This is a genuine bug in the input since
625 UCS4 does not allow such values. */
626 if (irreversible
== NULL
)
627 /* We are transliterating, don't try to correct anything. */
628 return __GCONV_ILLEGAL_INPUT
;
630 if (flags
& __GCONV_IGNORE_ERRORS
)
632 /* Just ignore this character. */
637 return __GCONV_ILLEGAL_INPUT
;
640 *((uint32_t *) outptr
) = inval
;
641 outptr
+= sizeof (uint32_t);
647 /* Determine the status. */
648 if (*inptrp
== inend
)
649 result
= __GCONV_EMPTY_INPUT
;
650 else if (*inptrp
+ 4 > inend
)
651 result
= __GCONV_INCOMPLETE_INPUT
;
654 assert (*outptrp
+ 4 > outend
);
655 result
= __GCONV_FULL_OUTPUT
;
661 #ifndef _STRING_ARCH_unaligned
663 __attribute ((always_inline
))
664 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
665 struct __gconv_step_data
*step_data
,
666 const unsigned char **inptrp
,
667 const unsigned char *inend
,
668 unsigned char **outptrp
, unsigned char *outend
,
669 size_t *irreversible
)
671 int flags
= step_data
->__flags
;
672 const unsigned char *inptr
= *inptrp
;
673 unsigned char *outptr
= *outptrp
;
674 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
678 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
680 if (__builtin_expect (inptr
[3] > 0x80, 0))
682 /* The value is too large. We don't try transliteration here since
683 this is not an error because of the lack of possibilities to
684 represent the result. This is a genuine bug in the input since
685 UCS4 does not allow such values. */
686 if (irreversible
== NULL
)
687 /* We are transliterating, don't try to correct anything. */
688 return __GCONV_ILLEGAL_INPUT
;
690 if (flags
& __GCONV_IGNORE_ERRORS
)
692 /* Just ignore this character. */
699 return __GCONV_ILLEGAL_INPUT
;
702 # if __BYTE_ORDER == __BIG_ENDIAN
703 outptr
[3] = inptr
[0];
704 outptr
[2] = inptr
[1];
705 outptr
[1] = inptr
[2];
706 outptr
[0] = inptr
[3];
708 outptr
[0] = inptr
[0];
709 outptr
[1] = inptr
[1];
710 outptr
[2] = inptr
[2];
711 outptr
[3] = inptr
[3];
720 /* Determine the status. */
721 if (*inptrp
== inend
)
722 result
= __GCONV_EMPTY_INPUT
;
723 else if (*inptrp
+ 4 > inend
)
724 result
= __GCONV_INCOMPLETE_INPUT
;
727 assert (*outptrp
+ 4 > outend
);
728 result
= __GCONV_FULL_OUTPUT
;
737 __attribute ((always_inline
))
738 ucs4le_internal_loop_single (struct __gconv_step
*step
,
739 struct __gconv_step_data
*step_data
,
740 const unsigned char **inptrp
,
741 const unsigned char *inend
,
742 unsigned char **outptrp
, unsigned char *outend
,
743 size_t *irreversible
)
745 mbstate_t *state
= step_data
->__statep
;
746 int flags
= step_data
->__flags
;
747 size_t cnt
= state
->__count
& 7;
749 while (*inptrp
< inend
&& cnt
< 4)
750 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
752 if (__builtin_expect (cnt
< 4, 0))
754 /* Still not enough bytes. Store the ones in the input buffer. */
755 state
->__count
&= ~7;
756 state
->__count
|= cnt
;
758 return __GCONV_INCOMPLETE_INPUT
;
761 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
764 /* The value is too large. We don't try transliteration here since
765 this is not an error because of the lack of possibilities to
766 represent the result. This is a genuine bug in the input since
767 UCS4 does not allow such values. */
768 if (!(flags
& __GCONV_IGNORE_ERRORS
))
769 return __GCONV_ILLEGAL_INPUT
;
773 #if __BYTE_ORDER == __BIG_ENDIAN
774 (*outptrp
)[0] = state
->__value
.__wchb
[3];
775 (*outptrp
)[1] = state
->__value
.__wchb
[2];
776 (*outptrp
)[2] = state
->__value
.__wchb
[1];
777 (*outptrp
)[3] = state
->__value
.__wchb
[0];
779 (*outptrp
)[0] = state
->__value
.__wchb
[0];
780 (*outptrp
)[1] = state
->__value
.__wchb
[1];
781 (*outptrp
)[2] = state
->__value
.__wchb
[2];
782 (*outptrp
)[3] = state
->__value
.__wchb
[3];
788 /* Clear the state buffer. */
789 state
->__count
&= ~7;
794 #include <iconv/skeleton.c>
797 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
798 #define DEFINE_INIT 0
799 #define DEFINE_FINI 0
800 #define MIN_NEEDED_FROM 1
801 #define MIN_NEEDED_TO 4
802 #define FROM_DIRECTION 1
803 #define FROM_LOOP ascii_internal_loop
804 #define TO_LOOP ascii_internal_loop /* This is not used. */
805 #define FUNCTION_NAME __gconv_transform_ascii_internal
806 #define ONE_DIRECTION 1
808 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
809 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
810 #define LOOPFCT FROM_LOOP
813 if (__builtin_expect (*inptr > '\x7f', 0)) \
815 /* The value is too large. We don't try transliteration here since \
816 this is not an error because of the lack of possibilities to \
817 represent the result. This is a genuine bug in the input since \
818 ASCII does not allow such values. */ \
819 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
823 /* It's an one byte sequence. */ \
824 *((uint32_t *) outptr) = *inptr++; \
825 outptr += sizeof (uint32_t); \
828 #define LOOP_NEED_FLAGS
829 #include <iconv/loop.c>
830 #include <iconv/skeleton.c>
833 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
834 #define DEFINE_INIT 0
835 #define DEFINE_FINI 0
836 #define MIN_NEEDED_FROM 4
837 #define MIN_NEEDED_TO 1
838 #define FROM_DIRECTION 1
839 #define FROM_LOOP internal_ascii_loop
840 #define TO_LOOP internal_ascii_loop /* This is not used. */
841 #define FUNCTION_NAME __gconv_transform_internal_ascii
842 #define ONE_DIRECTION 1
844 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
845 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
846 #define LOOPFCT FROM_LOOP
849 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
851 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
852 STANDARD_TO_LOOP_ERR_HANDLER (4); \
856 /* It's an one byte sequence. */ \
857 *outptr++ = *((const uint32_t *) inptr); \
858 inptr += sizeof (uint32_t); \
861 #define LOOP_NEED_FLAGS
862 #include <iconv/loop.c>
863 #include <iconv/skeleton.c>
866 /* Convert from the internal (UCS4-like) format to UTF-8. */
867 #define DEFINE_INIT 0
868 #define DEFINE_FINI 0
869 #define MIN_NEEDED_FROM 4
870 #define MIN_NEEDED_TO 1
871 #define MAX_NEEDED_TO 6
872 #define FROM_DIRECTION 1
873 #define FROM_LOOP internal_utf8_loop
874 #define TO_LOOP internal_utf8_loop /* This is not used. */
875 #define FUNCTION_NAME __gconv_transform_internal_utf8
876 #define ONE_DIRECTION 1
878 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
879 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
880 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
881 #define LOOPFCT FROM_LOOP
884 uint32_t wc = *((const uint32_t *) inptr); \
886 if (__builtin_expect (wc < 0x80, 1)) \
887 /* It's an one byte sequence. */ \
888 *outptr++ = (unsigned char) wc; \
889 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
892 unsigned char *start; \
894 for (step = 2; step < 6; ++step) \
895 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
898 if (__builtin_expect (outptr + step > outend, 0)) \
901 result = __GCONV_FULL_OUTPUT; \
906 *outptr = (unsigned char) (~0xff >> step); \
910 start[--step] = 0x80 | (wc & 0x3f); \
918 STANDARD_TO_LOOP_ERR_HANDLER (4); \
923 #define LOOP_NEED_FLAGS
924 #include <iconv/loop.c>
925 #include <iconv/skeleton.c>
928 /* Convert from UTF-8 to the internal (UCS4-like) format. */
929 #define DEFINE_INIT 0
930 #define DEFINE_FINI 0
931 #define MIN_NEEDED_FROM 1
932 #define MAX_NEEDED_FROM 6
933 #define MIN_NEEDED_TO 4
934 #define FROM_DIRECTION 1
935 #define FROM_LOOP utf8_internal_loop
936 #define TO_LOOP utf8_internal_loop /* This is not used. */
937 #define FUNCTION_NAME __gconv_transform_utf8_internal
938 #define ONE_DIRECTION 1
940 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
941 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
942 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
943 #define LOOPFCT FROM_LOOP
946 /* Next input byte. */ \
947 uint32_t ch = *inptr; \
949 if (__builtin_expect (ch < 0x80, 1)) \
951 /* One byte sequence. */ \
959 if (ch >= 0xc2 && ch < 0xe0) \
961 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
962 otherwise the wide character could have been represented \
963 using a single byte. */ \
967 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
969 /* We expect three bytes. */ \
973 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
975 /* We expect four bytes. */ \
979 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
981 /* We expect five bytes. */ \
985 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
987 /* We expect six bytes. */ \
993 /* Search the end of this ill-formed UTF-8 character. This \
994 is the next byte with (x & 0xc0) != 0x80. */ \
998 while (inptr + i < inend \
999 && (*(inptr + i) & 0xc0) == 0x80 \
1003 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1006 if (__builtin_expect (inptr + cnt > inend, 0)) \
1008 /* We don't have enough input. But before we report that check \
1009 that all the bytes are correct. */ \
1010 for (i = 1; inptr + i < inend; ++i) \
1011 if ((inptr[i] & 0xc0) != 0x80) \
1014 if (__builtin_expect (inptr + i == inend, 1)) \
1016 result = __GCONV_INCOMPLETE_INPUT; \
1023 /* Read the possible remaining bytes. */ \
1024 for (i = 1; i < cnt; ++i) \
1026 uint32_t byte = inptr[i]; \
1028 if ((byte & 0xc0) != 0x80) \
1029 /* This is an illegal encoding. */ \
1033 ch |= byte & 0x3f; \
1036 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1037 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1038 have been represented with fewer than cnt bytes. */ \
1039 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1040 /* Do not accept UTF-16 surrogates. */ \
1041 || (ch >= 0xd800 && ch <= 0xdfff)) \
1043 /* This is an illegal encoding. */ \
1050 /* Now adjust the pointers and store the result. */ \
1051 *((uint32_t *) outptr) = ch; \
1052 outptr += sizeof (uint32_t); \
1054 #define LOOP_NEED_FLAGS
1056 #define STORE_REST \
1058 /* We store the remaining bytes while converting them into the UCS4 \
1059 format. We can assume that the first byte in the buffer is \
1060 correct and that it requires a larger number of bytes than there \
1061 are in the input buffer. */ \
1062 wint_t ch = **inptrp; \
1065 state->__count = inend - *inptrp; \
1067 assert (ch != 0xc0 && ch != 0xc1); \
1068 if (ch >= 0xc2 && ch < 0xe0) \
1070 /* We expect two bytes. The first byte cannot be 0xc0 or \
1071 0xc1, otherwise the wide character could have been \
1072 represented using a single byte. */ \
1076 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1078 /* We expect three bytes. */ \
1082 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1084 /* We expect four bytes. */ \
1088 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1090 /* We expect five bytes. */ \
1096 /* We expect six bytes. */ \
1101 /* The first byte is already consumed. */ \
1103 while (++(*inptrp) < inend) \
1106 ch |= **inptrp & 0x3f; \
1110 /* Shift for the so far missing bytes. */ \
1113 /* Store the number of bytes expected for the entire sequence. */ \
1114 state->__count |= cnt << 8; \
1116 /* Store the value. */ \
1117 state->__value.__wch = ch; \
1120 #define UNPACK_BYTES \
1122 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1123 wint_t wch = state->__value.__wch; \
1124 size_t ntotal = state->__count >> 8; \
1126 inlen = state->__count & 255; \
1128 bytebuf[0] = inmask[ntotal - 2]; \
1132 if (--ntotal < inlen) \
1133 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1136 while (ntotal > 1); \
1138 bytebuf[0] |= wch; \
1141 #define CLEAR_STATE \
1145 #include <iconv/loop.c>
1146 #include <iconv/skeleton.c>
1149 /* Convert from UCS2 to the internal (UCS4-like) format. */
1150 #define DEFINE_INIT 0
1151 #define DEFINE_FINI 0
1152 #define MIN_NEEDED_FROM 2
1153 #define MIN_NEEDED_TO 4
1154 #define FROM_DIRECTION 1
1155 #define FROM_LOOP ucs2_internal_loop
1156 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1157 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1158 #define ONE_DIRECTION 1
1160 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1161 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1162 #define LOOPFCT FROM_LOOP
1165 uint16_t u1 = get16 (inptr); \
1167 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1169 /* Surrogate characters in UCS-2 input are not valid. Reject \
1170 them. (Catching this here is not security relevant.) */ \
1171 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1174 *((uint32_t *) outptr) = u1; \
1175 outptr += sizeof (uint32_t); \
1178 #define LOOP_NEED_FLAGS
1179 #include <iconv/loop.c>
1180 #include <iconv/skeleton.c>
1183 /* Convert from the internal (UCS4-like) format to UCS2. */
1184 #define DEFINE_INIT 0
1185 #define DEFINE_FINI 0
1186 #define MIN_NEEDED_FROM 4
1187 #define MIN_NEEDED_TO 2
1188 #define FROM_DIRECTION 1
1189 #define FROM_LOOP internal_ucs2_loop
1190 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1191 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1192 #define ONE_DIRECTION 1
1194 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1195 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1196 #define LOOPFCT FROM_LOOP
1199 uint32_t val = *((const uint32_t *) inptr); \
1201 if (__builtin_expect (val >= 0x10000, 0)) \
1203 UNICODE_TAG_HANDLER (val, 4); \
1204 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1206 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1208 /* Surrogate characters in UCS-4 input are not valid. \
1209 We must catch this, because the UCS-2 output might be \
1210 interpreted as UTF-16 by other programs. If we let \
1211 surrogates pass through, attackers could make a security \
1212 hole exploit by synthesizing any desired plane 1-16 \
1214 result = __GCONV_ILLEGAL_INPUT; \
1215 if (! ignore_errors_p ()) \
1223 put16 (outptr, val); \
1224 outptr += sizeof (uint16_t); \
1228 #define LOOP_NEED_FLAGS
1229 #include <iconv/loop.c>
1230 #include <iconv/skeleton.c>
1233 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1234 #define DEFINE_INIT 0
1235 #define DEFINE_FINI 0
1236 #define MIN_NEEDED_FROM 2
1237 #define MIN_NEEDED_TO 4
1238 #define FROM_DIRECTION 1
1239 #define FROM_LOOP ucs2reverse_internal_loop
1240 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1241 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1242 #define ONE_DIRECTION 1
1244 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1245 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1246 #define LOOPFCT FROM_LOOP
1249 uint16_t u1 = bswap_16 (get16 (inptr)); \
1251 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1253 /* Surrogate characters in UCS-2 input are not valid. Reject \
1254 them. (Catching this here is not security relevant.) */ \
1255 if (! ignore_errors_p ()) \
1257 result = __GCONV_ILLEGAL_INPUT; \
1265 *((uint32_t *) outptr) = u1; \
1266 outptr += sizeof (uint32_t); \
1269 #define LOOP_NEED_FLAGS
1270 #include <iconv/loop.c>
1271 #include <iconv/skeleton.c>
1274 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1275 #define DEFINE_INIT 0
1276 #define DEFINE_FINI 0
1277 #define MIN_NEEDED_FROM 4
1278 #define MIN_NEEDED_TO 2
1279 #define FROM_DIRECTION 1
1280 #define FROM_LOOP internal_ucs2reverse_loop
1281 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1282 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1283 #define ONE_DIRECTION 1
1285 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1286 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1287 #define LOOPFCT FROM_LOOP
1290 uint32_t val = *((const uint32_t *) inptr); \
1291 if (__builtin_expect (val >= 0x10000, 0)) \
1293 UNICODE_TAG_HANDLER (val, 4); \
1294 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1296 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1298 /* Surrogate characters in UCS-4 input are not valid. \
1299 We must catch this, because the UCS-2 output might be \
1300 interpreted as UTF-16 by other programs. If we let \
1301 surrogates pass through, attackers could make a security \
1302 hole exploit by synthesizing any desired plane 1-16 \
1304 if (! ignore_errors_p ()) \
1306 result = __GCONV_ILLEGAL_INPUT; \
1315 put16 (outptr, bswap_16 (val)); \
1316 outptr += sizeof (uint16_t); \
1320 #define LOOP_NEED_FLAGS
1321 #include <iconv/loop.c>
1322 #include <iconv/skeleton.c>