1 /* Simple transformations functions.
2 Copyright (C) 1997-2005, 2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
43 # define EILSEQ EINVAL
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
50 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
75 __attribute ((always_inline
))
76 internal_ucs4_loop (struct __gconv_step
*step
,
77 struct __gconv_step_data
*step_data
,
78 const unsigned char **inptrp
, const unsigned char *inend
,
79 unsigned char **outptrp
, unsigned char *outend
,
82 const unsigned char *inptr
= *inptrp
;
83 unsigned char *outptr
= *outptrp
;
84 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
90 uint32_t *outptr32
= (uint32_t *) outptr
;
92 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
93 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
96 *outptrp
= (unsigned char *) outptr32
;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp
= inptr
+ n_convert
* 4;
100 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
102 # error "This endianess is not supported."
105 /* Determine the status. */
106 if (*inptrp
== inend
)
107 result
= __GCONV_EMPTY_INPUT
;
108 else if (*outptrp
+ 4 > outend
)
109 result
= __GCONV_FULL_OUTPUT
;
111 result
= __GCONV_INCOMPLETE_INPUT
;
116 #ifndef _STRING_ARCH_unaligned
118 __attribute ((always_inline
))
119 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
120 struct __gconv_step_data
*step_data
,
121 const unsigned char **inptrp
,
122 const unsigned char *inend
,
123 unsigned char **outptrp
, unsigned char *outend
,
124 size_t *irreversible
)
126 const unsigned char *inptr
= *inptrp
;
127 unsigned char *outptr
= *outptrp
;
128 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
135 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
137 outptr
[0] = inptr
[3];
138 outptr
[1] = inptr
[2];
139 outptr
[2] = inptr
[1];
140 outptr
[3] = inptr
[0];
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp
= inptr
+ n_convert
* 4;
148 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
150 # error "This endianess is not supported."
153 /* Determine the status. */
154 if (*inptrp
== inend
)
155 result
= __GCONV_EMPTY_INPUT
;
156 else if (*outptrp
+ 4 > outend
)
157 result
= __GCONV_FULL_OUTPUT
;
159 result
= __GCONV_INCOMPLETE_INPUT
;
167 __attribute ((always_inline
))
168 internal_ucs4_loop_single (struct __gconv_step
*step
,
169 struct __gconv_step_data
*step_data
,
170 const unsigned char **inptrp
,
171 const unsigned char *inend
,
172 unsigned char **outptrp
, unsigned char *outend
,
173 size_t *irreversible
)
175 mbstate_t *state
= step_data
->__statep
;
176 size_t cnt
= state
->__count
& 7;
178 while (*inptrp
< inend
&& cnt
< 4)
179 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
181 if (__builtin_expect (cnt
< 4, 0))
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state
->__count
&= ~7;
185 state
->__count
|= cnt
;
187 return __GCONV_INCOMPLETE_INPUT
;
190 #if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp
)[0] = state
->__value
.__wchb
[3];
192 (*outptrp
)[1] = state
->__value
.__wchb
[2];
193 (*outptrp
)[2] = state
->__value
.__wchb
[1];
194 (*outptrp
)[3] = state
->__value
.__wchb
[0];
196 #elif __BYTE_ORDER == __BIG_ENDIAN
198 (*outptrp
)[0] = state
->__value
.__wchb
[0];
199 (*outptrp
)[1] = state
->__value
.__wchb
[1];
200 (*outptrp
)[2] = state
->__value
.__wchb
[2];
201 (*outptrp
)[3] = state
->__value
.__wchb
[3];
203 # error "This endianess is not supported."
207 /* Clear the state buffer. */
208 state
->__count
&= ~7;
213 #include <iconv/skeleton.c>
216 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218 #define DEFINE_INIT 0
219 #define DEFINE_FINI 0
220 #define MIN_NEEDED_FROM 4
221 #define MIN_NEEDED_TO 4
222 #define FROM_DIRECTION 1
223 #define FROM_LOOP ucs4_internal_loop
224 #define TO_LOOP ucs4_internal_loop /* This is not used. */
225 #define FUNCTION_NAME __gconv_transform_ucs4_internal
229 __attribute ((always_inline
))
230 ucs4_internal_loop (struct __gconv_step
*step
,
231 struct __gconv_step_data
*step_data
,
232 const unsigned char **inptrp
, const unsigned char *inend
,
233 unsigned char **outptrp
, unsigned char *outend
,
234 size_t *irreversible
)
236 int flags
= step_data
->__flags
;
237 const unsigned char *inptr
= *inptrp
;
238 unsigned char *outptr
= *outptrp
;
239 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
243 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248 inval
= bswap_32 (*(const uint32_t *) inptr
);
250 inval
= *(const uint32_t *) inptr
;
253 if (__builtin_expect (inval
> 0x7fffffff, 0))
255 /* The value is too large. We don't try transliteration here since
256 this is not an error because of the lack of possibilities to
257 represent the result. This is a genuine bug in the input since
258 UCS4 does not allow such values. */
259 if (irreversible
== NULL
)
260 /* We are transliterating, don't try to correct anything. */
261 return __GCONV_ILLEGAL_INPUT
;
263 if (flags
& __GCONV_IGNORE_ERRORS
)
265 /* Just ignore this character. */
272 return __GCONV_ILLEGAL_INPUT
;
275 *((uint32_t *) outptr
) = inval
;
276 outptr
+= sizeof (uint32_t);
282 /* Determine the status. */
283 if (*inptrp
== inend
)
284 result
= __GCONV_EMPTY_INPUT
;
285 else if (*outptrp
+ 4 > outend
)
286 result
= __GCONV_FULL_OUTPUT
;
288 result
= __GCONV_INCOMPLETE_INPUT
;
293 #ifndef _STRING_ARCH_unaligned
295 __attribute ((always_inline
))
296 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
297 struct __gconv_step_data
*step_data
,
298 const unsigned char **inptrp
,
299 const unsigned char *inend
,
300 unsigned char **outptrp
, unsigned char *outend
,
301 size_t *irreversible
)
303 int flags
= step_data
->__flags
;
304 const unsigned char *inptr
= *inptrp
;
305 unsigned char *outptr
= *outptrp
;
306 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
310 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
312 if (__builtin_expect (inptr
[0] > 0x80, 0))
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
318 if (irreversible
== NULL
)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT
;
322 if (flags
& __GCONV_IGNORE_ERRORS
)
324 /* Just ignore this character. */
331 return __GCONV_ILLEGAL_INPUT
;
334 # if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr
[3] = inptr
[0];
336 outptr
[2] = inptr
[1];
337 outptr
[1] = inptr
[2];
338 outptr
[0] = inptr
[3];
340 outptr
[0] = inptr
[0];
341 outptr
[1] = inptr
[1];
342 outptr
[2] = inptr
[2];
343 outptr
[3] = inptr
[3];
351 /* Determine the status. */
352 if (*inptrp
== inend
)
353 result
= __GCONV_EMPTY_INPUT
;
354 else if (*outptrp
+ 4 > outend
)
355 result
= __GCONV_FULL_OUTPUT
;
357 result
= __GCONV_INCOMPLETE_INPUT
;
365 __attribute ((always_inline
))
366 ucs4_internal_loop_single (struct __gconv_step
*step
,
367 struct __gconv_step_data
*step_data
,
368 const unsigned char **inptrp
,
369 const unsigned char *inend
,
370 unsigned char **outptrp
, unsigned char *outend
,
371 size_t *irreversible
)
373 mbstate_t *state
= step_data
->__statep
;
374 int flags
= step_data
->__flags
;
375 size_t cnt
= state
->__count
& 7;
377 while (*inptrp
< inend
&& cnt
< 4)
378 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
380 if (__builtin_expect (cnt
< 4, 0))
382 /* Still not enough bytes. Store the ones in the input buffer. */
383 state
->__count
&= ~7;
384 state
->__count
|= cnt
;
386 return __GCONV_INCOMPLETE_INPUT
;
389 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
392 /* The value is too large. We don't try transliteration here since
393 this is not an error because of the lack of possibilities to
394 represent the result. This is a genuine bug in the input since
395 UCS4 does not allow such values. */
396 if (!(flags
& __GCONV_IGNORE_ERRORS
))
398 *inptrp
-= cnt
- (state
->__count
& 7);
399 return __GCONV_ILLEGAL_INPUT
;
404 #if __BYTE_ORDER == __LITTLE_ENDIAN
405 (*outptrp
)[0] = state
->__value
.__wchb
[3];
406 (*outptrp
)[1] = state
->__value
.__wchb
[2];
407 (*outptrp
)[2] = state
->__value
.__wchb
[1];
408 (*outptrp
)[3] = state
->__value
.__wchb
[0];
409 #elif __BYTE_ORDER == __BIG_ENDIAN
410 (*outptrp
)[0] = state
->__value
.__wchb
[0];
411 (*outptrp
)[1] = state
->__value
.__wchb
[1];
412 (*outptrp
)[2] = state
->__value
.__wchb
[2];
413 (*outptrp
)[3] = state
->__value
.__wchb
[3];
419 /* Clear the state buffer. */
420 state
->__count
&= ~7;
425 #include <iconv/skeleton.c>
428 /* Similarly for the little endian form. */
429 #define DEFINE_INIT 0
430 #define DEFINE_FINI 0
431 #define MIN_NEEDED_FROM 4
432 #define MIN_NEEDED_TO 4
433 #define FROM_DIRECTION 1
434 #define FROM_LOOP internal_ucs4le_loop
435 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
436 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
440 __attribute ((always_inline
))
441 internal_ucs4le_loop (struct __gconv_step
*step
,
442 struct __gconv_step_data
*step_data
,
443 const unsigned char **inptrp
, const unsigned char *inend
,
444 unsigned char **outptrp
, unsigned char *outend
,
445 size_t *irreversible
)
447 const unsigned char *inptr
= *inptrp
;
448 unsigned char *outptr
= *outptrp
;
449 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
452 #if __BYTE_ORDER == __BIG_ENDIAN
453 /* Sigh, we have to do some real work. */
455 uint32_t *outptr32
= (uint32_t *) outptr
;
457 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
458 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
459 outptr
= (unsigned char *) outptr32
;
463 #elif __BYTE_ORDER == __LITTLE_ENDIAN
464 /* Simply copy the data. */
465 *inptrp
= inptr
+ n_convert
* 4;
466 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
468 # error "This endianess is not supported."
471 /* Determine the status. */
472 if (*inptrp
== inend
)
473 result
= __GCONV_EMPTY_INPUT
;
474 else if (*outptrp
+ 4 > outend
)
475 result
= __GCONV_FULL_OUTPUT
;
477 result
= __GCONV_INCOMPLETE_INPUT
;
482 #ifndef _STRING_ARCH_unaligned
484 __attribute ((always_inline
))
485 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
486 struct __gconv_step_data
*step_data
,
487 const unsigned char **inptrp
,
488 const unsigned char *inend
,
489 unsigned char **outptrp
, unsigned char *outend
,
490 size_t *irreversible
)
492 const unsigned char *inptr
= *inptrp
;
493 unsigned char *outptr
= *outptrp
;
494 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
497 # if __BYTE_ORDER == __BIG_ENDIAN
498 /* Sigh, we have to do some real work. */
501 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
503 outptr
[0] = inptr
[3];
504 outptr
[1] = inptr
[2];
505 outptr
[2] = inptr
[1];
506 outptr
[3] = inptr
[0];
511 # elif __BYTE_ORDER == __LITTLE_ENDIAN
512 /* Simply copy the data. */
513 *inptrp
= inptr
+ n_convert
* 4;
514 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
516 # error "This endianess is not supported."
519 /* Determine the status. */
520 if (*inptrp
== inend
)
521 result
= __GCONV_EMPTY_INPUT
;
522 else if (*inptrp
+ 4 > inend
)
523 result
= __GCONV_INCOMPLETE_INPUT
;
526 assert (*outptrp
+ 4 > outend
);
527 result
= __GCONV_FULL_OUTPUT
;
536 __attribute ((always_inline
))
537 internal_ucs4le_loop_single (struct __gconv_step
*step
,
538 struct __gconv_step_data
*step_data
,
539 const unsigned char **inptrp
,
540 const unsigned char *inend
,
541 unsigned char **outptrp
, unsigned char *outend
,
542 size_t *irreversible
)
544 mbstate_t *state
= step_data
->__statep
;
545 size_t cnt
= state
->__count
& 7;
547 while (*inptrp
< inend
&& cnt
< 4)
548 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
550 if (__builtin_expect (cnt
< 4, 0))
552 /* Still not enough bytes. Store the ones in the input buffer. */
553 state
->__count
&= ~7;
554 state
->__count
|= cnt
;
556 return __GCONV_INCOMPLETE_INPUT
;
559 #if __BYTE_ORDER == __BIG_ENDIAN
560 (*outptrp
)[0] = state
->__value
.__wchb
[3];
561 (*outptrp
)[1] = state
->__value
.__wchb
[2];
562 (*outptrp
)[2] = state
->__value
.__wchb
[1];
563 (*outptrp
)[3] = state
->__value
.__wchb
[0];
567 (*outptrp
)[0] = state
->__value
.__wchb
[0];
568 (*outptrp
)[1] = state
->__value
.__wchb
[1];
569 (*outptrp
)[2] = state
->__value
.__wchb
[2];
570 (*outptrp
)[3] = state
->__value
.__wchb
[3];
576 /* Clear the state buffer. */
577 state
->__count
&= ~7;
582 #include <iconv/skeleton.c>
585 /* And finally from UCS4-LE to the internal encoding. */
586 #define DEFINE_INIT 0
587 #define DEFINE_FINI 0
588 #define MIN_NEEDED_FROM 4
589 #define MIN_NEEDED_TO 4
590 #define FROM_DIRECTION 1
591 #define FROM_LOOP ucs4le_internal_loop
592 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
593 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
597 __attribute ((always_inline
))
598 ucs4le_internal_loop (struct __gconv_step
*step
,
599 struct __gconv_step_data
*step_data
,
600 const unsigned char **inptrp
, const unsigned char *inend
,
601 unsigned char **outptrp
, unsigned char *outend
,
602 size_t *irreversible
)
604 int flags
= step_data
->__flags
;
605 const unsigned char *inptr
= *inptrp
;
606 unsigned char *outptr
= *outptrp
;
607 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
611 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
615 #if __BYTE_ORDER == __BIG_ENDIAN
616 inval
= bswap_32 (*(const uint32_t *) inptr
);
618 inval
= *(const uint32_t *) inptr
;
621 if (__builtin_expect (inval
> 0x7fffffff, 0))
623 /* The value is too large. We don't try transliteration here since
624 this is not an error because of the lack of possibilities to
625 represent the result. This is a genuine bug in the input since
626 UCS4 does not allow such values. */
627 if (irreversible
== NULL
)
628 /* We are transliterating, don't try to correct anything. */
629 return __GCONV_ILLEGAL_INPUT
;
631 if (flags
& __GCONV_IGNORE_ERRORS
)
633 /* Just ignore this character. */
638 return __GCONV_ILLEGAL_INPUT
;
641 *((uint32_t *) outptr
) = inval
;
642 outptr
+= sizeof (uint32_t);
648 /* Determine the status. */
649 if (*inptrp
== inend
)
650 result
= __GCONV_EMPTY_INPUT
;
651 else if (*inptrp
+ 4 > inend
)
652 result
= __GCONV_INCOMPLETE_INPUT
;
655 assert (*outptrp
+ 4 > outend
);
656 result
= __GCONV_FULL_OUTPUT
;
662 #ifndef _STRING_ARCH_unaligned
664 __attribute ((always_inline
))
665 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
666 struct __gconv_step_data
*step_data
,
667 const unsigned char **inptrp
,
668 const unsigned char *inend
,
669 unsigned char **outptrp
, unsigned char *outend
,
670 size_t *irreversible
)
672 int flags
= step_data
->__flags
;
673 const unsigned char *inptr
= *inptrp
;
674 unsigned char *outptr
= *outptrp
;
675 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
679 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
681 if (__builtin_expect (inptr
[3] > 0x80, 0))
683 /* The value is too large. We don't try transliteration here since
684 this is not an error because of the lack of possibilities to
685 represent the result. This is a genuine bug in the input since
686 UCS4 does not allow such values. */
687 if (irreversible
== NULL
)
688 /* We are transliterating, don't try to correct anything. */
689 return __GCONV_ILLEGAL_INPUT
;
691 if (flags
& __GCONV_IGNORE_ERRORS
)
693 /* Just ignore this character. */
700 return __GCONV_ILLEGAL_INPUT
;
703 # if __BYTE_ORDER == __BIG_ENDIAN
704 outptr
[3] = inptr
[0];
705 outptr
[2] = inptr
[1];
706 outptr
[1] = inptr
[2];
707 outptr
[0] = inptr
[3];
709 outptr
[0] = inptr
[0];
710 outptr
[1] = inptr
[1];
711 outptr
[2] = inptr
[2];
712 outptr
[3] = inptr
[3];
721 /* Determine the status. */
722 if (*inptrp
== inend
)
723 result
= __GCONV_EMPTY_INPUT
;
724 else if (*inptrp
+ 4 > inend
)
725 result
= __GCONV_INCOMPLETE_INPUT
;
728 assert (*outptrp
+ 4 > outend
);
729 result
= __GCONV_FULL_OUTPUT
;
738 __attribute ((always_inline
))
739 ucs4le_internal_loop_single (struct __gconv_step
*step
,
740 struct __gconv_step_data
*step_data
,
741 const unsigned char **inptrp
,
742 const unsigned char *inend
,
743 unsigned char **outptrp
, unsigned char *outend
,
744 size_t *irreversible
)
746 mbstate_t *state
= step_data
->__statep
;
747 int flags
= step_data
->__flags
;
748 size_t cnt
= state
->__count
& 7;
750 while (*inptrp
< inend
&& cnt
< 4)
751 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
753 if (__builtin_expect (cnt
< 4, 0))
755 /* Still not enough bytes. Store the ones in the input buffer. */
756 state
->__count
&= ~7;
757 state
->__count
|= cnt
;
759 return __GCONV_INCOMPLETE_INPUT
;
762 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
765 /* The value is too large. We don't try transliteration here since
766 this is not an error because of the lack of possibilities to
767 represent the result. This is a genuine bug in the input since
768 UCS4 does not allow such values. */
769 if (!(flags
& __GCONV_IGNORE_ERRORS
))
770 return __GCONV_ILLEGAL_INPUT
;
774 #if __BYTE_ORDER == __BIG_ENDIAN
775 (*outptrp
)[0] = state
->__value
.__wchb
[3];
776 (*outptrp
)[1] = state
->__value
.__wchb
[2];
777 (*outptrp
)[2] = state
->__value
.__wchb
[1];
778 (*outptrp
)[3] = state
->__value
.__wchb
[0];
780 (*outptrp
)[0] = state
->__value
.__wchb
[0];
781 (*outptrp
)[1] = state
->__value
.__wchb
[1];
782 (*outptrp
)[2] = state
->__value
.__wchb
[2];
783 (*outptrp
)[3] = state
->__value
.__wchb
[3];
789 /* Clear the state buffer. */
790 state
->__count
&= ~7;
795 #include <iconv/skeleton.c>
798 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
799 #define DEFINE_INIT 0
800 #define DEFINE_FINI 0
801 #define MIN_NEEDED_FROM 1
802 #define MIN_NEEDED_TO 4
803 #define FROM_DIRECTION 1
804 #define FROM_LOOP ascii_internal_loop
805 #define TO_LOOP ascii_internal_loop /* This is not used. */
806 #define FUNCTION_NAME __gconv_transform_ascii_internal
807 #define ONE_DIRECTION 1
809 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
810 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
811 #define LOOPFCT FROM_LOOP
814 if (__builtin_expect (*inptr > '\x7f', 0)) \
816 /* The value is too large. We don't try transliteration here since \
817 this is not an error because of the lack of possibilities to \
818 represent the result. This is a genuine bug in the input since \
819 ASCII does not allow such values. */ \
820 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
824 /* It's an one byte sequence. */ \
825 *((uint32_t *) outptr) = *inptr++; \
826 outptr += sizeof (uint32_t); \
829 #define LOOP_NEED_FLAGS
830 #include <iconv/loop.c>
831 #include <iconv/skeleton.c>
834 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
835 #define DEFINE_INIT 0
836 #define DEFINE_FINI 0
837 #define MIN_NEEDED_FROM 4
838 #define MIN_NEEDED_TO 1
839 #define FROM_DIRECTION 1
840 #define FROM_LOOP internal_ascii_loop
841 #define TO_LOOP internal_ascii_loop /* This is not used. */
842 #define FUNCTION_NAME __gconv_transform_internal_ascii
843 #define ONE_DIRECTION 1
845 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
846 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
847 #define LOOPFCT FROM_LOOP
850 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
852 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
853 STANDARD_TO_LOOP_ERR_HANDLER (4); \
857 /* It's an one byte sequence. */ \
858 *outptr++ = *((const uint32_t *) inptr); \
859 inptr += sizeof (uint32_t); \
862 #define LOOP_NEED_FLAGS
863 #include <iconv/loop.c>
864 #include <iconv/skeleton.c>
867 /* Convert from the internal (UCS4-like) format to UTF-8. */
868 #define DEFINE_INIT 0
869 #define DEFINE_FINI 0
870 #define MIN_NEEDED_FROM 4
871 #define MIN_NEEDED_TO 1
872 #define MAX_NEEDED_TO 6
873 #define FROM_DIRECTION 1
874 #define FROM_LOOP internal_utf8_loop
875 #define TO_LOOP internal_utf8_loop /* This is not used. */
876 #define FUNCTION_NAME __gconv_transform_internal_utf8
877 #define ONE_DIRECTION 1
879 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
880 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
881 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
882 #define LOOPFCT FROM_LOOP
885 uint32_t wc = *((const uint32_t *) inptr); \
887 if (__builtin_expect (wc < 0x80, 1)) \
888 /* It's an one byte sequence. */ \
889 *outptr++ = (unsigned char) wc; \
890 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
893 unsigned char *start; \
895 for (step = 2; step < 6; ++step) \
896 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
899 if (__builtin_expect (outptr + step > outend, 0)) \
902 result = __GCONV_FULL_OUTPUT; \
907 *outptr = (unsigned char) (~0xff >> step); \
911 start[--step] = 0x80 | (wc & 0x3f); \
919 STANDARD_TO_LOOP_ERR_HANDLER (4); \
924 #define LOOP_NEED_FLAGS
925 #include <iconv/loop.c>
926 #include <iconv/skeleton.c>
929 /* Convert from UTF-8 to the internal (UCS4-like) format. */
930 #define DEFINE_INIT 0
931 #define DEFINE_FINI 0
932 #define MIN_NEEDED_FROM 1
933 #define MAX_NEEDED_FROM 6
934 #define MIN_NEEDED_TO 4
935 #define FROM_DIRECTION 1
936 #define FROM_LOOP utf8_internal_loop
937 #define TO_LOOP utf8_internal_loop /* This is not used. */
938 #define FUNCTION_NAME __gconv_transform_utf8_internal
939 #define ONE_DIRECTION 1
941 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
942 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
943 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
944 #define LOOPFCT FROM_LOOP
947 /* Next input byte. */ \
948 uint32_t ch = *inptr; \
950 if (__builtin_expect (ch < 0x80, 1)) \
952 /* One byte sequence. */ \
960 if (ch >= 0xc2 && ch < 0xe0) \
962 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
963 otherwise the wide character could have been represented \
964 using a single byte. */ \
968 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
970 /* We expect three bytes. */ \
974 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
976 /* We expect four bytes. */ \
980 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
982 /* We expect five bytes. */ \
986 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
988 /* We expect six bytes. */ \
994 /* Search the end of this ill-formed UTF-8 character. This \
995 is the next byte with (x & 0xc0) != 0x80. */ \
999 while (inptr + i < inend \
1000 && (*(inptr + i) & 0xc0) == 0x80 \
1004 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1007 if (__builtin_expect (inptr + cnt > inend, 0)) \
1009 /* We don't have enough input. But before we report that check \
1010 that all the bytes are correct. */ \
1011 for (i = 1; inptr + i < inend; ++i) \
1012 if ((inptr[i] & 0xc0) != 0x80) \
1015 if (__builtin_expect (inptr + i == inend, 1)) \
1017 result = __GCONV_INCOMPLETE_INPUT; \
1024 /* Read the possible remaining bytes. */ \
1025 for (i = 1; i < cnt; ++i) \
1027 uint32_t byte = inptr[i]; \
1029 if ((byte & 0xc0) != 0x80) \
1030 /* This is an illegal encoding. */ \
1034 ch |= byte & 0x3f; \
1037 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1038 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1039 have been represented with fewer than cnt bytes. */ \
1040 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1042 /* This is an illegal encoding. */ \
1049 /* Now adjust the pointers and store the result. */ \
1050 *((uint32_t *) outptr) = ch; \
1051 outptr += sizeof (uint32_t); \
1053 #define LOOP_NEED_FLAGS
1055 #define STORE_REST \
1057 /* We store the remaining bytes while converting them into the UCS4 \
1058 format. We can assume that the first byte in the buffer is \
1059 correct and that it requires a larger number of bytes than there \
1060 are in the input buffer. */ \
1061 wint_t ch = **inptrp; \
1064 state->__count = inend - *inptrp; \
1066 if (ch >= 0xc2 && ch < 0xe0) \
1068 /* We expect two bytes. The first byte cannot be 0xc0 or \
1069 0xc1, otherwise the wide character could have been \
1070 represented using a single byte. */ \
1074 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1076 /* We expect three bytes. */ \
1080 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1082 /* We expect four bytes. */ \
1086 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1088 /* We expect five bytes. */ \
1094 /* We expect six bytes. */ \
1099 /* The first byte is already consumed. */ \
1101 while (++(*inptrp) < inend) \
1104 ch |= **inptrp & 0x3f; \
1108 /* Shift for the so far missing bytes. */ \
1111 /* Store the number of bytes expected for the entire sequence. */ \
1112 state->__count |= cnt << 8; \
1114 /* Store the value. */ \
1115 state->__value.__wch = ch; \
1118 #define UNPACK_BYTES \
1120 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1121 wint_t wch = state->__value.__wch; \
1122 size_t ntotal = state->__count >> 8; \
1124 inlen = state->__count & 255; \
1126 bytebuf[0] = inmask[ntotal - 2]; \
1130 if (--ntotal < inlen) \
1131 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1134 while (ntotal > 1); \
1136 bytebuf[0] |= wch; \
1139 #define CLEAR_STATE \
1143 #include <iconv/loop.c>
1144 #include <iconv/skeleton.c>
1147 /* Convert from UCS2 to the internal (UCS4-like) format. */
1148 #define DEFINE_INIT 0
1149 #define DEFINE_FINI 0
1150 #define MIN_NEEDED_FROM 2
1151 #define MIN_NEEDED_TO 4
1152 #define FROM_DIRECTION 1
1153 #define FROM_LOOP ucs2_internal_loop
1154 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1155 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1156 #define ONE_DIRECTION 1
1158 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1159 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1160 #define LOOPFCT FROM_LOOP
1163 uint16_t u1 = get16 (inptr); \
1165 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1167 /* Surrogate characters in UCS-2 input are not valid. Reject \
1168 them. (Catching this here is not security relevant.) */ \
1169 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1172 *((uint32_t *) outptr) = u1; \
1173 outptr += sizeof (uint32_t); \
1176 #define LOOP_NEED_FLAGS
1177 #include <iconv/loop.c>
1178 #include <iconv/skeleton.c>
1181 /* Convert from the internal (UCS4-like) format to UCS2. */
1182 #define DEFINE_INIT 0
1183 #define DEFINE_FINI 0
1184 #define MIN_NEEDED_FROM 4
1185 #define MIN_NEEDED_TO 2
1186 #define FROM_DIRECTION 1
1187 #define FROM_LOOP internal_ucs2_loop
1188 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1189 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1190 #define ONE_DIRECTION 1
1192 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1193 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1194 #define LOOPFCT FROM_LOOP
1197 uint32_t val = *((const uint32_t *) inptr); \
1199 if (__builtin_expect (val >= 0x10000, 0)) \
1201 UNICODE_TAG_HANDLER (val, 4); \
1202 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1204 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1206 /* Surrogate characters in UCS-4 input are not valid. \
1207 We must catch this, because the UCS-2 output might be \
1208 interpreted as UTF-16 by other programs. If we let \
1209 surrogates pass through, attackers could make a security \
1210 hole exploit by synthesizing any desired plane 1-16 \
1212 result = __GCONV_ILLEGAL_INPUT; \
1213 if (! ignore_errors_p ()) \
1221 put16 (outptr, val); \
1222 outptr += sizeof (uint16_t); \
1226 #define LOOP_NEED_FLAGS
1227 #include <iconv/loop.c>
1228 #include <iconv/skeleton.c>
1231 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1232 #define DEFINE_INIT 0
1233 #define DEFINE_FINI 0
1234 #define MIN_NEEDED_FROM 2
1235 #define MIN_NEEDED_TO 4
1236 #define FROM_DIRECTION 1
1237 #define FROM_LOOP ucs2reverse_internal_loop
1238 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1239 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1240 #define ONE_DIRECTION 1
1242 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1243 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1244 #define LOOPFCT FROM_LOOP
1247 uint16_t u1 = bswap_16 (get16 (inptr)); \
1249 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1251 /* Surrogate characters in UCS-2 input are not valid. Reject \
1252 them. (Catching this here is not security relevant.) */ \
1253 if (! ignore_errors_p ()) \
1255 result = __GCONV_ILLEGAL_INPUT; \
1263 *((uint32_t *) outptr) = u1; \
1264 outptr += sizeof (uint32_t); \
1267 #define LOOP_NEED_FLAGS
1268 #include <iconv/loop.c>
1269 #include <iconv/skeleton.c>
1272 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1273 #define DEFINE_INIT 0
1274 #define DEFINE_FINI 0
1275 #define MIN_NEEDED_FROM 4
1276 #define MIN_NEEDED_TO 2
1277 #define FROM_DIRECTION 1
1278 #define FROM_LOOP internal_ucs2reverse_loop
1279 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1280 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1281 #define ONE_DIRECTION 1
1283 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1284 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1285 #define LOOPFCT FROM_LOOP
1288 uint32_t val = *((const uint32_t *) inptr); \
1289 if (__builtin_expect (val >= 0x10000, 0)) \
1291 UNICODE_TAG_HANDLER (val, 4); \
1292 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1294 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1296 /* Surrogate characters in UCS-4 input are not valid. \
1297 We must catch this, because the UCS-2 output might be \
1298 interpreted as UTF-16 by other programs. If we let \
1299 surrogates pass through, attackers could make a security \
1300 hole exploit by synthesizing any desired plane 1-16 \
1302 if (! ignore_errors_p ()) \
1304 result = __GCONV_ILLEGAL_INPUT; \
1313 put16 (outptr, bswap_16 (val)); \
1314 outptr += sizeof (uint16_t); \
1318 #define LOOP_NEED_FLAGS
1319 #include <iconv/loop.c>
1320 #include <iconv/skeleton.c>