1 /* Simple transformations functions.
2 Copyright (C) 1997-2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
43 # define EILSEQ EINVAL
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
50 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
75 __attribute ((always_inline
))
76 internal_ucs4_loop (struct __gconv_step
*step
,
77 struct __gconv_step_data
*step_data
,
78 const unsigned char **inptrp
, const unsigned char *inend
,
79 unsigned char **outptrp
, unsigned char *outend
,
82 const unsigned char *inptr
= *inptrp
;
83 unsigned char *outptr
= *outptrp
;
84 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
90 uint32_t *outptr32
= (uint32_t *) outptr
;
92 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
93 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
96 *outptrp
= (unsigned char *) outptr32
;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp
= inptr
+ n_convert
* 4;
100 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
102 # error "This endianess is not supported."
105 /* Determine the status. */
106 if (*inptrp
== inend
)
107 result
= __GCONV_EMPTY_INPUT
;
108 else if (*outptrp
+ 4 > outend
)
109 result
= __GCONV_FULL_OUTPUT
;
111 result
= __GCONV_INCOMPLETE_INPUT
;
116 #ifndef _STRING_ARCH_unaligned
118 __attribute ((always_inline
))
119 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
120 struct __gconv_step_data
*step_data
,
121 const unsigned char **inptrp
,
122 const unsigned char *inend
,
123 unsigned char **outptrp
, unsigned char *outend
,
124 size_t *irreversible
)
126 const unsigned char *inptr
= *inptrp
;
127 unsigned char *outptr
= *outptrp
;
128 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
135 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
137 outptr
[0] = inptr
[3];
138 outptr
[1] = inptr
[2];
139 outptr
[2] = inptr
[1];
140 outptr
[3] = inptr
[0];
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp
= inptr
+ n_convert
* 4;
148 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
150 # error "This endianess is not supported."
153 /* Determine the status. */
154 if (*inptrp
== inend
)
155 result
= __GCONV_EMPTY_INPUT
;
156 else if (*outptrp
+ 4 > outend
)
157 result
= __GCONV_FULL_OUTPUT
;
159 result
= __GCONV_INCOMPLETE_INPUT
;
167 __attribute ((always_inline
))
168 internal_ucs4_loop_single (struct __gconv_step
*step
,
169 struct __gconv_step_data
*step_data
,
170 const unsigned char **inptrp
,
171 const unsigned char *inend
,
172 unsigned char **outptrp
, unsigned char *outend
,
173 size_t *irreversible
)
175 mbstate_t *state
= step_data
->__statep
;
176 size_t cnt
= state
->__count
& 7;
178 while (*inptrp
< inend
&& cnt
< 4)
179 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
181 if (__builtin_expect (cnt
< 4, 0))
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state
->__count
&= ~7;
185 state
->__count
|= cnt
;
187 return __GCONV_INCOMPLETE_INPUT
;
190 #if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp
)[0] = state
->__value
.__wchb
[3];
192 (*outptrp
)[1] = state
->__value
.__wchb
[2];
193 (*outptrp
)[2] = state
->__value
.__wchb
[1];
194 (*outptrp
)[3] = state
->__value
.__wchb
[0];
196 #elif __BYTE_ORDER == __BIG_ENDIAN
198 (*outptrp
)[0] = state
->__value
.__wchb
[0];
199 (*outptrp
)[1] = state
->__value
.__wchb
[1];
200 (*outptrp
)[2] = state
->__value
.__wchb
[2];
201 (*outptrp
)[3] = state
->__value
.__wchb
[3];
203 # error "This endianess is not supported."
207 /* Clear the state buffer. */
208 state
->__count
&= ~7;
213 #include <iconv/skeleton.c>
216 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218 #define DEFINE_INIT 0
219 #define DEFINE_FINI 0
220 #define MIN_NEEDED_FROM 4
221 #define MIN_NEEDED_TO 4
222 #define FROM_DIRECTION 1
223 #define FROM_LOOP ucs4_internal_loop
224 #define TO_LOOP ucs4_internal_loop /* This is not used. */
225 #define FUNCTION_NAME __gconv_transform_ucs4_internal
229 __attribute ((always_inline
))
230 ucs4_internal_loop (struct __gconv_step
*step
,
231 struct __gconv_step_data
*step_data
,
232 const unsigned char **inptrp
, const unsigned char *inend
,
233 unsigned char **outptrp
, unsigned char *outend
,
234 size_t *irreversible
)
236 int flags
= step_data
->__flags
;
237 const unsigned char *inptr
= *inptrp
;
238 unsigned char *outptr
= *outptrp
;
239 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
243 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248 inval
= bswap_32 (*(const uint32_t *) inptr
);
250 inval
= *(const uint32_t *) inptr
;
253 if (__builtin_expect (inval
> 0x7fffffff, 0))
255 /* The value is too large. We don't try transliteration here since
256 this is not an error because of the lack of possibilities to
257 represent the result. This is a genuine bug in the input since
258 UCS4 does not allow such values. */
259 if (irreversible
== NULL
)
260 /* We are transliterating, don't try to correct anything. */
261 return __GCONV_ILLEGAL_INPUT
;
263 if (flags
& __GCONV_IGNORE_ERRORS
)
265 /* Just ignore this character. */
272 return __GCONV_ILLEGAL_INPUT
;
275 *((uint32_t *) outptr
) = inval
;
276 outptr
+= sizeof (uint32_t);
282 /* Determine the status. */
283 if (*inptrp
== inend
)
284 result
= __GCONV_EMPTY_INPUT
;
285 else if (*outptrp
+ 4 > outend
)
286 result
= __GCONV_FULL_OUTPUT
;
288 result
= __GCONV_INCOMPLETE_INPUT
;
293 #ifndef _STRING_ARCH_unaligned
295 __attribute ((always_inline
))
296 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
297 struct __gconv_step_data
*step_data
,
298 const unsigned char **inptrp
,
299 const unsigned char *inend
,
300 unsigned char **outptrp
, unsigned char *outend
,
301 size_t *irreversible
)
303 int flags
= step_data
->__flags
;
304 const unsigned char *inptr
= *inptrp
;
305 unsigned char *outptr
= *outptrp
;
306 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
310 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
312 if (__builtin_expect (inptr
[0] > 0x80, 0))
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
318 if (irreversible
== NULL
)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT
;
322 if (flags
& __GCONV_IGNORE_ERRORS
)
324 /* Just ignore this character. */
331 return __GCONV_ILLEGAL_INPUT
;
334 # if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr
[3] = inptr
[0];
336 outptr
[2] = inptr
[1];
337 outptr
[1] = inptr
[2];
338 outptr
[0] = inptr
[3];
340 outptr
[0] = inptr
[0];
341 outptr
[1] = inptr
[1];
342 outptr
[2] = inptr
[2];
343 outptr
[3] = inptr
[3];
351 /* Determine the status. */
352 if (*inptrp
== inend
)
353 result
= __GCONV_EMPTY_INPUT
;
354 else if (*outptrp
+ 4 > outend
)
355 result
= __GCONV_FULL_OUTPUT
;
357 result
= __GCONV_INCOMPLETE_INPUT
;
365 __attribute ((always_inline
))
366 ucs4_internal_loop_single (struct __gconv_step
*step
,
367 struct __gconv_step_data
*step_data
,
368 const unsigned char **inptrp
,
369 const unsigned char *inend
,
370 unsigned char **outptrp
, unsigned char *outend
,
371 size_t *irreversible
)
373 mbstate_t *state
= step_data
->__statep
;
374 int flags
= step_data
->__flags
;
375 size_t cnt
= state
->__count
& 7;
377 while (*inptrp
< inend
&& cnt
< 4)
378 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
380 if (__builtin_expect (cnt
< 4, 0))
382 /* Still not enough bytes. Store the ones in the input buffer. */
383 state
->__count
&= ~7;
384 state
->__count
|= cnt
;
386 return __GCONV_INCOMPLETE_INPUT
;
389 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
392 /* The value is too large. We don't try transliteration here since
393 this is not an error because of the lack of possibilities to
394 represent the result. This is a genuine bug in the input since
395 UCS4 does not allow such values. */
396 if (!(flags
& __GCONV_IGNORE_ERRORS
))
398 *inptrp
-= cnt
- (state
->__count
& 7);
399 return __GCONV_ILLEGAL_INPUT
;
404 #if __BYTE_ORDER == __LITTLE_ENDIAN
405 (*outptrp
)[0] = state
->__value
.__wchb
[3];
406 (*outptrp
)[1] = state
->__value
.__wchb
[2];
407 (*outptrp
)[2] = state
->__value
.__wchb
[1];
408 (*outptrp
)[3] = state
->__value
.__wchb
[0];
409 #elif __BYTE_ORDER == __BIG_ENDIAN
410 (*outptrp
)[0] = state
->__value
.__wchb
[0];
411 (*outptrp
)[1] = state
->__value
.__wchb
[1];
412 (*outptrp
)[2] = state
->__value
.__wchb
[2];
413 (*outptrp
)[3] = state
->__value
.__wchb
[3];
419 /* Clear the state buffer. */
420 state
->__count
&= ~7;
425 #include <iconv/skeleton.c>
428 /* Similarly for the little endian form. */
429 #define DEFINE_INIT 0
430 #define DEFINE_FINI 0
431 #define MIN_NEEDED_FROM 4
432 #define MIN_NEEDED_TO 4
433 #define FROM_DIRECTION 1
434 #define FROM_LOOP internal_ucs4le_loop
435 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
436 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
440 __attribute ((always_inline
))
441 internal_ucs4le_loop (struct __gconv_step
*step
,
442 struct __gconv_step_data
*step_data
,
443 const unsigned char **inptrp
, const unsigned char *inend
,
444 unsigned char **outptrp
, unsigned char *outend
,
445 size_t *irreversible
)
447 const unsigned char *inptr
= *inptrp
;
448 unsigned char *outptr
= *outptrp
;
449 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
452 #if __BYTE_ORDER == __BIG_ENDIAN
453 /* Sigh, we have to do some real work. */
455 uint32_t *outptr32
= (uint32_t *) outptr
;
457 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
458 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
459 outptr
= (unsigned char *) outptr32
;
463 #elif __BYTE_ORDER == __LITTLE_ENDIAN
464 /* Simply copy the data. */
465 *inptrp
= inptr
+ n_convert
* 4;
466 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
468 # error "This endianess is not supported."
471 /* Determine the status. */
472 if (*inptrp
== inend
)
473 result
= __GCONV_EMPTY_INPUT
;
474 else if (*outptrp
+ 4 > outend
)
475 result
= __GCONV_FULL_OUTPUT
;
477 result
= __GCONV_INCOMPLETE_INPUT
;
482 #ifndef _STRING_ARCH_unaligned
484 __attribute ((always_inline
))
485 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
486 struct __gconv_step_data
*step_data
,
487 const unsigned char **inptrp
,
488 const unsigned char *inend
,
489 unsigned char **outptrp
, unsigned char *outend
,
490 size_t *irreversible
)
492 const unsigned char *inptr
= *inptrp
;
493 unsigned char *outptr
= *outptrp
;
494 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
497 # if __BYTE_ORDER == __BIG_ENDIAN
498 /* Sigh, we have to do some real work. */
501 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
503 outptr
[0] = inptr
[3];
504 outptr
[1] = inptr
[2];
505 outptr
[2] = inptr
[1];
506 outptr
[3] = inptr
[0];
511 # elif __BYTE_ORDER == __LITTLE_ENDIAN
512 /* Simply copy the data. */
513 *inptrp
= inptr
+ n_convert
* 4;
514 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
516 # error "This endianess is not supported."
519 /* Determine the status. */
520 if (*inptrp
== inend
)
521 result
= __GCONV_EMPTY_INPUT
;
522 else if (*inptrp
+ 4 > inend
)
523 result
= __GCONV_INCOMPLETE_INPUT
;
526 assert (*outptrp
+ 4 > outend
);
527 result
= __GCONV_FULL_OUTPUT
;
536 __attribute ((always_inline
))
537 internal_ucs4le_loop_single (struct __gconv_step
*step
,
538 struct __gconv_step_data
*step_data
,
539 const unsigned char **inptrp
,
540 const unsigned char *inend
,
541 unsigned char **outptrp
, unsigned char *outend
,
542 size_t *irreversible
)
544 mbstate_t *state
= step_data
->__statep
;
545 size_t cnt
= state
->__count
& 7;
547 while (*inptrp
< inend
&& cnt
< 4)
548 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
550 if (__builtin_expect (cnt
< 4, 0))
552 /* Still not enough bytes. Store the ones in the input buffer. */
553 state
->__count
&= ~7;
554 state
->__count
|= cnt
;
556 return __GCONV_INCOMPLETE_INPUT
;
559 #if __BYTE_ORDER == __BIG_ENDIAN
560 (*outptrp
)[0] = state
->__value
.__wchb
[3];
561 (*outptrp
)[1] = state
->__value
.__wchb
[2];
562 (*outptrp
)[2] = state
->__value
.__wchb
[1];
563 (*outptrp
)[3] = state
->__value
.__wchb
[0];
567 (*outptrp
)[0] = state
->__value
.__wchb
[0];
568 (*outptrp
)[1] = state
->__value
.__wchb
[1];
569 (*outptrp
)[2] = state
->__value
.__wchb
[2];
570 (*outptrp
)[3] = state
->__value
.__wchb
[3];
576 /* Clear the state buffer. */
577 state
->__count
&= ~7;
582 #include <iconv/skeleton.c>
585 /* And finally from UCS4-LE to the internal encoding. */
586 #define DEFINE_INIT 0
587 #define DEFINE_FINI 0
588 #define MIN_NEEDED_FROM 4
589 #define MIN_NEEDED_TO 4
590 #define FROM_DIRECTION 1
591 #define FROM_LOOP ucs4le_internal_loop
592 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
593 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
597 __attribute ((always_inline
))
598 ucs4le_internal_loop (struct __gconv_step
*step
,
599 struct __gconv_step_data
*step_data
,
600 const unsigned char **inptrp
, const unsigned char *inend
,
601 unsigned char **outptrp
, unsigned char *outend
,
602 size_t *irreversible
)
604 int flags
= step_data
->__flags
;
605 const unsigned char *inptr
= *inptrp
;
606 unsigned char *outptr
= *outptrp
;
607 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
611 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
615 #if __BYTE_ORDER == __BIG_ENDIAN
616 inval
= bswap_32 (*(const uint32_t *) inptr
);
618 inval
= *(const uint32_t *) inptr
;
621 if (__builtin_expect (inval
> 0x7fffffff, 0))
623 /* The value is too large. We don't try transliteration here since
624 this is not an error because of the lack of possibilities to
625 represent the result. This is a genuine bug in the input since
626 UCS4 does not allow such values. */
627 if (irreversible
== NULL
)
628 /* We are transliterating, don't try to correct anything. */
629 return __GCONV_ILLEGAL_INPUT
;
631 if (flags
& __GCONV_IGNORE_ERRORS
)
633 /* Just ignore this character. */
638 return __GCONV_ILLEGAL_INPUT
;
641 *((uint32_t *) outptr
) = inval
;
642 outptr
+= sizeof (uint32_t);
648 /* Determine the status. */
649 if (*inptrp
== inend
)
650 result
= __GCONV_EMPTY_INPUT
;
651 else if (*inptrp
+ 4 > inend
)
652 result
= __GCONV_INCOMPLETE_INPUT
;
655 assert (*outptrp
+ 4 > outend
);
656 result
= __GCONV_FULL_OUTPUT
;
662 #ifndef _STRING_ARCH_unaligned
664 __attribute ((always_inline
))
665 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
666 struct __gconv_step_data
*step_data
,
667 const unsigned char **inptrp
,
668 const unsigned char *inend
,
669 unsigned char **outptrp
, unsigned char *outend
,
670 size_t *irreversible
)
672 int flags
= step_data
->__flags
;
673 const unsigned char *inptr
= *inptrp
;
674 unsigned char *outptr
= *outptrp
;
675 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
679 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
681 if (__builtin_expect (inptr
[3] > 0x80, 0))
683 /* The value is too large. We don't try transliteration here since
684 this is not an error because of the lack of possibilities to
685 represent the result. This is a genuine bug in the input since
686 UCS4 does not allow such values. */
687 if (irreversible
== NULL
)
688 /* We are transliterating, don't try to correct anything. */
689 return __GCONV_ILLEGAL_INPUT
;
691 if (flags
& __GCONV_IGNORE_ERRORS
)
693 /* Just ignore this character. */
700 return __GCONV_ILLEGAL_INPUT
;
703 # if __BYTE_ORDER == __BIG_ENDIAN
704 outptr
[3] = inptr
[0];
705 outptr
[2] = inptr
[1];
706 outptr
[1] = inptr
[2];
707 outptr
[0] = inptr
[3];
709 outptr
[0] = inptr
[0];
710 outptr
[1] = inptr
[1];
711 outptr
[2] = inptr
[2];
712 outptr
[3] = inptr
[3];
721 /* Determine the status. */
722 if (*inptrp
== inend
)
723 result
= __GCONV_EMPTY_INPUT
;
724 else if (*inptrp
+ 4 > inend
)
725 result
= __GCONV_INCOMPLETE_INPUT
;
728 assert (*outptrp
+ 4 > outend
);
729 result
= __GCONV_FULL_OUTPUT
;
738 __attribute ((always_inline
))
739 ucs4le_internal_loop_single (struct __gconv_step
*step
,
740 struct __gconv_step_data
*step_data
,
741 const unsigned char **inptrp
,
742 const unsigned char *inend
,
743 unsigned char **outptrp
, unsigned char *outend
,
744 size_t *irreversible
)
746 mbstate_t *state
= step_data
->__statep
;
747 int flags
= step_data
->__flags
;
748 size_t cnt
= state
->__count
& 7;
750 while (*inptrp
< inend
&& cnt
< 4)
751 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
753 if (__builtin_expect (cnt
< 4, 0))
755 /* Still not enough bytes. Store the ones in the input buffer. */
756 state
->__count
&= ~7;
757 state
->__count
|= cnt
;
759 return __GCONV_INCOMPLETE_INPUT
;
762 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
765 /* The value is too large. We don't try transliteration here since
766 this is not an error because of the lack of possibilities to
767 represent the result. This is a genuine bug in the input since
768 UCS4 does not allow such values. */
769 if (!(flags
& __GCONV_IGNORE_ERRORS
))
770 return __GCONV_ILLEGAL_INPUT
;
774 #if __BYTE_ORDER == __BIG_ENDIAN
775 (*outptrp
)[0] = state
->__value
.__wchb
[3];
776 (*outptrp
)[1] = state
->__value
.__wchb
[2];
777 (*outptrp
)[2] = state
->__value
.__wchb
[1];
778 (*outptrp
)[3] = state
->__value
.__wchb
[0];
780 (*outptrp
)[0] = state
->__value
.__wchb
[0];
781 (*outptrp
)[1] = state
->__value
.__wchb
[1];
782 (*outptrp
)[2] = state
->__value
.__wchb
[2];
783 (*outptrp
)[3] = state
->__value
.__wchb
[3];
789 /* Clear the state buffer. */
790 state
->__count
&= ~7;
795 #include <iconv/skeleton.c>
798 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
799 #define DEFINE_INIT 0
800 #define DEFINE_FINI 0
801 #define MIN_NEEDED_FROM 1
802 #define MIN_NEEDED_TO 4
803 #define FROM_DIRECTION 1
804 #define FROM_LOOP ascii_internal_loop
805 #define TO_LOOP ascii_internal_loop /* This is not used. */
806 #define FUNCTION_NAME __gconv_transform_ascii_internal
807 #define ONE_DIRECTION 1
809 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
810 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
811 #define LOOPFCT FROM_LOOP
814 if (__builtin_expect (*inptr > '\x7f', 0)) \
816 /* The value is too large. We don't try transliteration here since \
817 this is not an error because of the lack of possibilities to \
818 represent the result. This is a genuine bug in the input since \
819 ASCII does not allow such values. */ \
820 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
823 /* It's an one byte sequence. */ \
824 *((uint32_t *) outptr) = *inptr++; \
825 outptr += sizeof (uint32_t); \
827 #define LOOP_NEED_FLAGS
828 #include <iconv/loop.c>
829 #include <iconv/skeleton.c>
832 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
833 #define DEFINE_INIT 0
834 #define DEFINE_FINI 0
835 #define MIN_NEEDED_FROM 4
836 #define MIN_NEEDED_TO 1
837 #define FROM_DIRECTION 1
838 #define FROM_LOOP internal_ascii_loop
839 #define TO_LOOP internal_ascii_loop /* This is not used. */
840 #define FUNCTION_NAME __gconv_transform_internal_ascii
841 #define ONE_DIRECTION 1
843 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
844 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
845 #define LOOPFCT FROM_LOOP
848 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
850 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
851 STANDARD_TO_LOOP_ERR_HANDLER (4); \
854 /* It's an one byte sequence. */ \
855 *outptr++ = *((const uint32_t *) inptr); \
856 inptr += sizeof (uint32_t); \
858 #define LOOP_NEED_FLAGS
859 #include <iconv/loop.c>
860 #include <iconv/skeleton.c>
863 /* Convert from the internal (UCS4-like) format to UTF-8. */
864 #define DEFINE_INIT 0
865 #define DEFINE_FINI 0
866 #define MIN_NEEDED_FROM 4
867 #define MIN_NEEDED_TO 1
868 #define MAX_NEEDED_TO 6
869 #define FROM_DIRECTION 1
870 #define FROM_LOOP internal_utf8_loop
871 #define TO_LOOP internal_utf8_loop /* This is not used. */
872 #define FUNCTION_NAME __gconv_transform_internal_utf8
873 #define ONE_DIRECTION 1
875 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
876 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
877 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
878 #define LOOPFCT FROM_LOOP
881 uint32_t wc = *((const uint32_t *) inptr); \
884 /* It's an one byte sequence. */ \
885 *outptr++ = (unsigned char) wc; \
886 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
889 unsigned char *start; \
891 for (step = 2; step < 6; ++step) \
892 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
895 if (__builtin_expect (outptr + step > outend, 0)) \
898 result = __GCONV_FULL_OUTPUT; \
903 *outptr = (unsigned char) (~0xff >> step); \
907 start[--step] = 0x80 | (wc & 0x3f); \
915 STANDARD_TO_LOOP_ERR_HANDLER (4); \
920 #define LOOP_NEED_FLAGS
921 #include <iconv/loop.c>
922 #include <iconv/skeleton.c>
925 /* Convert from UTF-8 to the internal (UCS4-like) format. */
926 #define DEFINE_INIT 0
927 #define DEFINE_FINI 0
928 #define MIN_NEEDED_FROM 1
929 #define MAX_NEEDED_FROM 6
930 #define MIN_NEEDED_TO 4
931 #define FROM_DIRECTION 1
932 #define FROM_LOOP utf8_internal_loop
933 #define TO_LOOP utf8_internal_loop /* This is not used. */
934 #define FUNCTION_NAME __gconv_transform_utf8_internal
935 #define ONE_DIRECTION 1
937 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
938 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
939 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
940 #define LOOPFCT FROM_LOOP
947 /* Next input byte. */ \
952 /* One byte sequence. */ \
958 if (ch >= 0xc2 && ch < 0xe0) \
960 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
961 otherwise the wide character could have been represented \
962 using a single byte. */ \
966 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
968 /* We expect three bytes. */ \
972 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
974 /* We expect four bytes. */ \
978 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
980 /* We expect five bytes. */ \
984 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
986 /* We expect six bytes. */ \
992 /* Search the end of this ill-formed UTF-8 character. This \
993 is the next byte with (x & 0xc0) != 0x80. */ \
997 while (inptr + i < inend \
998 && (*(inptr + i) & 0xc0) == 0x80 \
1002 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1005 if (__builtin_expect (inptr + cnt > inend, 0)) \
1007 /* We don't have enough input. But before we report that check \
1008 that all the bytes are correct. */ \
1009 for (i = 1; inptr + i < inend; ++i) \
1010 if ((inptr[i] & 0xc0) != 0x80) \
1013 if (__builtin_expect (inptr + i == inend, 1)) \
1015 result = __GCONV_INCOMPLETE_INPUT; \
1022 /* Read the possible remaining bytes. */ \
1023 for (i = 1; i < cnt; ++i) \
1025 uint32_t byte = inptr[i]; \
1027 if ((byte & 0xc0) != 0x80) \
1028 /* This is an illegal encoding. */ \
1032 ch |= byte & 0x3f; \
1035 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1036 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1037 have been represented with fewer than cnt bytes. */ \
1038 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1040 /* This is an illegal encoding. */ \
1047 /* Now adjust the pointers and store the result. */ \
1048 *((uint32_t *) outptr) = ch; \
1049 outptr += sizeof (uint32_t); \
1051 #define LOOP_NEED_FLAGS
1053 #define STORE_REST \
1055 /* We store the remaining bytes while converting them into the UCS4 \
1056 format. We can assume that the first byte in the buffer is \
1057 correct and that it requires a larger number of bytes than there \
1058 are in the input buffer. */ \
1059 wint_t ch = **inptrp; \
1062 state->__count = inend - *inptrp; \
1064 if (ch >= 0xc2 && ch < 0xe0) \
1066 /* We expect two bytes. The first byte cannot be 0xc0 or \
1067 0xc1, otherwise the wide character could have been \
1068 represented using a single byte. */ \
1072 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1074 /* We expect three bytes. */ \
1078 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1080 /* We expect four bytes. */ \
1084 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1086 /* We expect five bytes. */ \
1092 /* We expect six bytes. */ \
1097 /* The first byte is already consumed. */ \
1099 while (++(*inptrp) < inend) \
1102 ch |= **inptrp & 0x3f; \
1106 /* Shift for the so far missing bytes. */ \
1109 /* Store the number of bytes expected for the entire sequence. */ \
1110 state->__count |= cnt << 8; \
1112 /* Store the value. */ \
1113 state->__value.__wch = ch; \
1116 #define UNPACK_BYTES \
1118 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1119 wint_t wch = state->__value.__wch; \
1120 size_t ntotal = state->__count >> 8; \
1122 inlen = state->__count & 255; \
1124 bytebuf[0] = inmask[ntotal - 2]; \
1128 if (--ntotal < inlen) \
1129 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1132 while (ntotal > 1); \
1134 bytebuf[0] |= wch; \
1137 #define CLEAR_STATE \
1141 #include <iconv/loop.c>
1142 #include <iconv/skeleton.c>
1145 /* Convert from UCS2 to the internal (UCS4-like) format. */
1146 #define DEFINE_INIT 0
1147 #define DEFINE_FINI 0
1148 #define MIN_NEEDED_FROM 2
1149 #define MIN_NEEDED_TO 4
1150 #define FROM_DIRECTION 1
1151 #define FROM_LOOP ucs2_internal_loop
1152 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1153 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1154 #define ONE_DIRECTION 1
1156 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1157 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1158 #define LOOPFCT FROM_LOOP
1161 uint16_t u1 = get16 (inptr); \
1163 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1165 /* Surrogate characters in UCS-2 input are not valid. Reject \
1166 them. (Catching this here is not security relevant.) */ \
1167 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1170 *((uint32_t *) outptr) = u1; \
1171 outptr += sizeof (uint32_t); \
1174 #define LOOP_NEED_FLAGS
1175 #include <iconv/loop.c>
1176 #include <iconv/skeleton.c>
1179 /* Convert from the internal (UCS4-like) format to UCS2. */
1180 #define DEFINE_INIT 0
1181 #define DEFINE_FINI 0
1182 #define MIN_NEEDED_FROM 4
1183 #define MIN_NEEDED_TO 2
1184 #define FROM_DIRECTION 1
1185 #define FROM_LOOP internal_ucs2_loop
1186 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1187 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1188 #define ONE_DIRECTION 1
1190 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1191 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1192 #define LOOPFCT FROM_LOOP
1195 uint32_t val = *((const uint32_t *) inptr); \
1197 if (__builtin_expect (val >= 0x10000, 0)) \
1199 UNICODE_TAG_HANDLER (val, 4); \
1200 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1202 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1204 /* Surrogate characters in UCS-4 input are not valid. \
1205 We must catch this, because the UCS-2 output might be \
1206 interpreted as UTF-16 by other programs. If we let \
1207 surrogates pass through, attackers could make a security \
1208 hole exploit by synthesizing any desired plane 1-16 \
1210 result = __GCONV_ILLEGAL_INPUT; \
1211 if (! ignore_errors_p ()) \
1219 put16 (outptr, val); \
1220 outptr += sizeof (uint16_t); \
1224 #define LOOP_NEED_FLAGS
1225 #include <iconv/loop.c>
1226 #include <iconv/skeleton.c>
1229 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1230 #define DEFINE_INIT 0
1231 #define DEFINE_FINI 0
1232 #define MIN_NEEDED_FROM 2
1233 #define MIN_NEEDED_TO 4
1234 #define FROM_DIRECTION 1
1235 #define FROM_LOOP ucs2reverse_internal_loop
1236 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1237 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1238 #define ONE_DIRECTION 1
1240 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1241 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1242 #define LOOPFCT FROM_LOOP
1245 uint16_t u1 = bswap_16 (get16 (inptr)); \
1247 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1249 /* Surrogate characters in UCS-2 input are not valid. Reject \
1250 them. (Catching this here is not security relevant.) */ \
1251 if (! ignore_errors_p ()) \
1253 result = __GCONV_ILLEGAL_INPUT; \
1261 *((uint32_t *) outptr) = u1; \
1262 outptr += sizeof (uint32_t); \
1265 #define LOOP_NEED_FLAGS
1266 #include <iconv/loop.c>
1267 #include <iconv/skeleton.c>
1270 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1271 #define DEFINE_INIT 0
1272 #define DEFINE_FINI 0
1273 #define MIN_NEEDED_FROM 4
1274 #define MIN_NEEDED_TO 2
1275 #define FROM_DIRECTION 1
1276 #define FROM_LOOP internal_ucs2reverse_loop
1277 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1278 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1279 #define ONE_DIRECTION 1
1281 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1282 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1283 #define LOOPFCT FROM_LOOP
1286 uint32_t val = *((const uint32_t *) inptr); \
1287 if (__builtin_expect (val >= 0x10000, 0)) \
1289 UNICODE_TAG_HANDLER (val, 4); \
1290 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1292 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1294 /* Surrogate characters in UCS-4 input are not valid. \
1295 We must catch this, because the UCS-2 output might be \
1296 interpreted as UTF-16 by other programs. If we let \
1297 surrogates pass through, attackers could make a security \
1298 hole exploit by synthesizing any desired plane 1-16 \
1300 if (! ignore_errors_p ()) \
1302 result = __GCONV_ILLEGAL_INPUT; \
1311 put16 (outptr, bswap_16 (val)); \
1312 outptr += sizeof (uint16_t); \
1316 #define LOOP_NEED_FLAGS
1317 #include <iconv/loop.c>
1318 #include <iconv/skeleton.c>