1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
30 #include <sys/param.h>
33 # define EILSEQ EINVAL
37 /* Transform from the internal, UCS4-like format, to UCS4. The
38 difference between the internal ucs4 format and the real UCS4
39 format is, if any, the endianess. The Unicode/ISO 10646 says that
40 unless some higher protocol specifies it differently, the byte
41 order is big endian.*/
44 #define MIN_NEEDED_FROM 4
45 #define MIN_NEEDED_TO 4
46 #define FROM_DIRECTION 1
47 #define FROM_LOOP internal_ucs4_loop
48 #define TO_LOOP internal_ucs4_loop /* This is not used. */
49 #define FUNCTION_NAME __gconv_transform_internal_ucs4
53 internal_ucs4_loop (struct __gconv_step
*step
,
54 struct __gconv_step_data
*step_data
,
55 const unsigned char **inptrp
, const unsigned char *inend
,
56 unsigned char **outptrp
, unsigned char *outend
,
59 const unsigned char *inptr
= *inptrp
;
60 unsigned char *outptr
= *outptrp
;
61 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
64 #if __BYTE_ORDER == __LITTLE_ENDIAN
65 /* Sigh, we have to do some real work. */
68 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
69 *((uint32_t *) outptr
)++ = bswap_32 (*(uint32_t *) inptr
);
73 #elif __BYTE_ORDER == __BIG_ENDIAN
74 /* Simply copy the data. */
75 *inptrp
= inptr
+ n_convert
* 4;
76 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
78 # error "This endianess is not supported."
81 /* Determine the status. */
83 result
= __GCONV_EMPTY_INPUT
;
84 else if (*outptrp
== outend
)
85 result
= __GCONV_FULL_OUTPUT
;
87 result
= __GCONV_INCOMPLETE_INPUT
;
92 #ifndef _STRING_ARCH_unaligned
94 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
95 struct __gconv_step_data
*step_data
,
96 const unsigned char **inptrp
,
97 const unsigned char *inend
,
98 unsigned char **outptrp
, unsigned char *outend
,
101 const unsigned char *inptr
= *inptrp
;
102 unsigned char *outptr
= *outptrp
;
103 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
106 # if __BYTE_ORDER == __LITTLE_ENDIAN
107 /* Sigh, we have to do some real work. */
110 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
112 outptr
[0] = inptr
[3];
113 outptr
[1] = inptr
[2];
114 outptr
[2] = inptr
[1];
115 outptr
[3] = inptr
[0];
120 # elif __BYTE_ORDER == __BIG_ENDIAN
121 /* Simply copy the data. */
122 *inptrp
= inptr
+ n_convert
* 4;
123 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
125 # error "This endianess is not supported."
128 /* Determine the status. */
129 if (*outptrp
== outend
)
130 result
= __GCONV_FULL_OUTPUT
;
131 else if (*inptrp
== inend
)
132 result
= __GCONV_EMPTY_INPUT
;
134 result
= __GCONV_INCOMPLETE_INPUT
;
142 internal_ucs4_loop_single (struct __gconv_step
*step
,
143 struct __gconv_step_data
*step_data
,
144 const unsigned char **inptrp
,
145 const unsigned char *inend
,
146 unsigned char **outptrp
, unsigned char *outend
,
147 size_t *irreversible
)
149 mbstate_t *state
= step_data
->__statep
;
150 size_t cnt
= state
->__count
& 7;
152 while (*inptrp
< inend
&& cnt
< 4)
153 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
155 if (__builtin_expect (cnt
, 4) < 4)
157 /* Still not enough bytes. Store the ones in the input buffer. */
158 state
->__count
&= ~7;
159 state
->__count
|= cnt
;
161 return __GCONV_INCOMPLETE_INPUT
;
164 #if __BYTE_ORDER == __LITTLE_ENDIAN
165 (*outptrp
)[0] = state
->__value
.__wchb
[3];
166 (*outptrp
)[1] = state
->__value
.__wchb
[2];
167 (*outptrp
)[2] = state
->__value
.__wchb
[1];
168 (*outptrp
)[3] = state
->__value
.__wchb
[0];
171 #elif __BYTE_ORDER == __BIG_ENDIAN
173 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
175 # error "This endianess is not supported."
178 /* Clear the state buffer. */
179 state
->__count
&= ~7;
184 #include <iconv/skeleton.c>
187 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
188 for the other direction we have to check for correct values here. */
189 #define DEFINE_INIT 0
190 #define DEFINE_FINI 0
191 #define MIN_NEEDED_FROM 4
192 #define MIN_NEEDED_TO 4
193 #define FROM_DIRECTION 1
194 #define FROM_LOOP ucs4_internal_loop
195 #define TO_LOOP ucs4_internal_loop /* This is not used. */
196 #define FUNCTION_NAME __gconv_transform_ucs4_internal
200 ucs4_internal_loop (struct __gconv_step
*step
,
201 struct __gconv_step_data
*step_data
,
202 const unsigned char **inptrp
, const unsigned char *inend
,
203 unsigned char **outptrp
, unsigned char *outend
,
204 size_t *irreversible
)
206 int flags
= step_data
->__flags
;
207 const unsigned char *inptr
= *inptrp
;
208 unsigned char *outptr
= *outptrp
;
209 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
213 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
217 #if __BYTE_ORDER == __LITTLE_ENDIAN
218 inval
= bswap_32 (*(uint32_t *) inptr
);
220 inval
= *(uint32_t *) inptr
;
223 if (__builtin_expect (inval
, 0) > 0x7fffffff)
225 /* The value is too large. We don't try transliteration here since
226 this is not an error because of the lack of possibilities to
227 represent the result. This is a genuine bug in the input since
228 UCS4 does not allow such values. */
229 if (irreversible
== NULL
)
230 /* We are transliterating, don't try to correct anything. */
231 return __GCONV_ILLEGAL_INPUT
;
233 if (flags
& __GCONV_IGNORE_ERRORS
)
235 /* Just ignore this character. */
242 return __GCONV_ILLEGAL_INPUT
;
245 *((uint32_t *) outptr
)++ = inval
;
251 /* Determine the status. */
252 if (*inptrp
== inend
)
253 result
= __GCONV_EMPTY_INPUT
;
254 else if (*outptrp
== outend
)
255 result
= __GCONV_FULL_OUTPUT
;
257 result
= __GCONV_INCOMPLETE_INPUT
;
262 #ifndef _STRING_ARCH_unaligned
264 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
265 struct __gconv_step_data
*step_data
,
266 const unsigned char **inptrp
,
267 const unsigned char *inend
,
268 unsigned char **outptrp
, unsigned char *outend
,
269 size_t *irreversible
)
271 int flags
= step_data
->__flags
;
272 const unsigned char *inptr
= *inptrp
;
273 unsigned char *outptr
= *outptrp
;
274 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
278 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
280 if (__builtin_expect (inptr
[0], 0) > 0x80)
282 /* The value is too large. We don't try transliteration here since
283 this is not an error because of the lack of possibilities to
284 represent the result. This is a genuine bug in the input since
285 UCS4 does not allow such values. */
286 if (irreversible
== NULL
)
287 /* We are transliterating, don't try to correct anything. */
288 return __GCONV_ILLEGAL_INPUT
;
290 if (flags
& __GCONV_IGNORE_ERRORS
)
292 /* Just ignore this character. */
299 return __GCONV_ILLEGAL_INPUT
;
302 # if __BYTE_ORDER == __LITTLE_ENDIAN
303 outptr
[3] = inptr
[0];
304 outptr
[2] = inptr
[1];
305 outptr
[1] = inptr
[2];
306 outptr
[0] = inptr
[3];
308 outptr
[0] = inptr
[0];
309 outptr
[1] = inptr
[1];
310 outptr
[2] = inptr
[2];
311 outptr
[3] = inptr
[3];
319 /* Determine the status. */
320 if (*inptrp
== inend
)
321 result
= __GCONV_EMPTY_INPUT
;
322 else if (*outptrp
== outend
)
323 result
= __GCONV_FULL_OUTPUT
;
325 result
= __GCONV_INCOMPLETE_INPUT
;
333 ucs4_internal_loop_single (struct __gconv_step
*step
,
334 struct __gconv_step_data
*step_data
,
335 const unsigned char **inptrp
,
336 const unsigned char *inend
,
337 unsigned char **outptrp
, unsigned char *outend
,
338 size_t *irreversible
)
340 mbstate_t *state
= step_data
->__statep
;
341 int flags
= step_data
->__flags
;
342 size_t cnt
= state
->__count
& 7;
344 while (*inptrp
< inend
&& cnt
< 4)
345 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
347 if (__builtin_expect (cnt
, 4) < 4)
349 /* Still not enough bytes. Store the ones in the input buffer. */
350 state
->__count
&= ~7;
351 state
->__count
|= cnt
;
353 return __GCONV_INCOMPLETE_INPUT
;
356 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0], 0)
359 /* The value is too large. We don't try transliteration here since
360 this is not an error because of the lack of possibilities to
361 represent the result. This is a genuine bug in the input since
362 UCS4 does not allow such values. */
363 if (!(flags
& __GCONV_IGNORE_ERRORS
))
365 *inptrp
-= cnt
- (state
->__count
& 7);
366 return __GCONV_ILLEGAL_INPUT
;
371 #if __BYTE_ORDER == __LITTLE_ENDIAN
372 (*outptrp
)[0] = state
->__value
.__wchb
[3];
373 (*outptrp
)[1] = state
->__value
.__wchb
[2];
374 (*outptrp
)[2] = state
->__value
.__wchb
[1];
375 (*outptrp
)[3] = state
->__value
.__wchb
[0];
376 #elif __BYTE_ORDER == __BIG_ENDIAN
377 (*outptrp
)[0] = state
->__value
.__wchb
[0];
378 (*outptrp
)[1] = state
->__value
.__wchb
[1];
379 (*outptrp
)[2] = state
->__value
.__wchb
[2];
380 (*outptrp
)[3] = state
->__value
.__wchb
[3];
386 /* Clear the state buffer. */
387 state
->__count
&= ~7;
392 #include <iconv/skeleton.c>
395 /* Similarly for the little endian form. */
396 #define DEFINE_INIT 0
397 #define DEFINE_FINI 0
398 #define MIN_NEEDED_FROM 4
399 #define MIN_NEEDED_TO 4
400 #define FROM_DIRECTION 1
401 #define FROM_LOOP internal_ucs4le_loop
402 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
403 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
407 internal_ucs4le_loop (struct __gconv_step
*step
,
408 struct __gconv_step_data
*step_data
,
409 const unsigned char **inptrp
, const unsigned char *inend
,
410 unsigned char **outptrp
, unsigned char *outend
,
411 size_t *irreversible
)
413 const unsigned char *inptr
= *inptrp
;
414 unsigned char *outptr
= *outptrp
;
415 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
418 #if __BYTE_ORDER == __BIG_ENDIAN
419 /* Sigh, we have to do some real work. */
422 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
423 *((uint32_t *) outptr
)++ = bswap_32 (*(uint32_t *) inptr
);
427 #elif __BYTE_ORDER == __LITTLE_ENDIAN
428 /* Simply copy the data. */
429 *inptrp
= inptr
+ n_convert
* 4;
430 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
432 # error "This endianess is not supported."
435 /* Determine the status. */
436 if (*inptrp
== inend
)
437 result
= __GCONV_EMPTY_INPUT
;
438 else if (*outptrp
== outend
)
439 result
= __GCONV_FULL_OUTPUT
;
441 result
= __GCONV_INCOMPLETE_INPUT
;
446 #ifndef _STRING_ARCH_unaligned
448 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
449 struct __gconv_step_data
*step_data
,
450 const unsigned char **inptrp
,
451 const unsigned char *inend
,
452 unsigned char **outptrp
, unsigned char *outend
,
453 size_t *irreversible
)
455 const unsigned char *inptr
= *inptrp
;
456 unsigned char *outptr
= *outptrp
;
457 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
460 # if __BYTE_ORDER == __BIG_ENDIAN
461 /* Sigh, we have to do some real work. */
464 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
466 outptr
[0] = inptr
[3];
467 outptr
[1] = inptr
[2];
468 outptr
[2] = inptr
[1];
469 outptr
[3] = inptr
[0];
474 # elif __BYTE_ORDER == __LITTLE_ENDIAN
475 /* Simply copy the data. */
476 *inptrp
= inptr
+ n_convert
* 4;
477 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
479 # error "This endianess is not supported."
482 /* Determine the status. */
483 if (*inptrp
== inend
)
484 result
= __GCONV_EMPTY_INPUT
;
485 else if (*outptrp
== outend
)
486 result
= __GCONV_FULL_OUTPUT
;
488 result
= __GCONV_INCOMPLETE_INPUT
;
496 internal_ucs4le_loop_single (struct __gconv_step
*step
,
497 struct __gconv_step_data
*step_data
,
498 const unsigned char **inptrp
,
499 const unsigned char *inend
,
500 unsigned char **outptrp
, unsigned char *outend
,
501 size_t *irreversible
)
503 mbstate_t *state
= step_data
->__statep
;
504 size_t cnt
= state
->__count
& 7;
506 while (*inptrp
< inend
&& cnt
< 4)
507 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
509 if (__builtin_expect (cnt
, 4) < 4)
511 /* Still not enough bytes. Store the ones in the input buffer. */
512 state
->__count
&= ~7;
513 state
->__count
|= cnt
;
515 return __GCONV_INCOMPLETE_INPUT
;
518 #if __BYTE_ORDER == __BIG_ENDIAN
519 (*outptrp
)[0] = state
->__value
.__wchb
[3];
520 (*outptrp
)[1] = state
->__value
.__wchb
[2];
521 (*outptrp
)[2] = state
->__value
.__wchb
[1];
522 (*outptrp
)[3] = state
->__value
.__wchb
[0];
527 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
530 /* Clear the state buffer. */
531 state
->__count
&= ~7;
536 #include <iconv/skeleton.c>
539 /* And finally from UCS4-LE to the internal encoding. */
540 #define DEFINE_INIT 0
541 #define DEFINE_FINI 0
542 #define MIN_NEEDED_FROM 4
543 #define MIN_NEEDED_TO 4
544 #define FROM_DIRECTION 1
545 #define FROM_LOOP ucs4le_internal_loop
546 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
547 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
551 ucs4le_internal_loop (struct __gconv_step
*step
,
552 struct __gconv_step_data
*step_data
,
553 const unsigned char **inptrp
, const unsigned char *inend
,
554 unsigned char **outptrp
, unsigned char *outend
,
555 size_t *irreversible
)
557 int flags
= step_data
->__flags
;
558 const unsigned char *inptr
= *inptrp
;
559 unsigned char *outptr
= *outptrp
;
560 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
564 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
568 #if __BYTE_ORDER == __BIG_ENDIAN
569 inval
= bswap_32 (*(uint32_t *) inptr
);
571 inval
= *(uint32_t *) inptr
;
574 if (__builtin_expect (inval
, 0) > 0x7fffffff)
576 /* The value is too large. We don't try transliteration here since
577 this is not an error because of the lack of possibilities to
578 represent the result. This is a genuine bug in the input since
579 UCS4 does not allow such values. */
580 if (irreversible
== NULL
)
581 /* We are transliterating, don't try to correct anything. */
582 return __GCONV_ILLEGAL_INPUT
;
584 if (flags
& __GCONV_IGNORE_ERRORS
)
586 /* Just ignore this character. */
591 return __GCONV_ILLEGAL_INPUT
;
594 *((uint32_t *) outptr
)++ = inval
;
600 /* Determine the status. */
601 if (*inptrp
== inend
)
602 result
= __GCONV_EMPTY_INPUT
;
603 else if (*outptrp
== outend
)
604 result
= __GCONV_FULL_OUTPUT
;
606 result
= __GCONV_INCOMPLETE_INPUT
;
611 #ifndef _STRING_ARCH_unaligned
613 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
614 struct __gconv_step_data
*step_data
,
615 const unsigned char **inptrp
,
616 const unsigned char *inend
,
617 unsigned char **outptrp
, unsigned char *outend
,
618 size_t *irreversible
)
620 int flags
= step_data
->__flags
;
621 const unsigned char *inptr
= *inptrp
;
622 unsigned char *outptr
= *outptrp
;
623 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
627 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
629 if (__builtin_expect (inptr
[3], 0) > 0x80)
631 /* The value is too large. We don't try transliteration here since
632 this is not an error because of the lack of possibilities to
633 represent the result. This is a genuine bug in the input since
634 UCS4 does not allow such values. */
635 if (irreversible
== NULL
)
636 /* We are transliterating, don't try to correct anything. */
637 return __GCONV_ILLEGAL_INPUT
;
639 if (flags
& __GCONV_IGNORE_ERRORS
)
641 /* Just ignore this character. */
648 return __GCONV_ILLEGAL_INPUT
;
651 # if __BYTE_ORDER == __BIG_ENDIAN
652 outptr
[3] = inptr
[0];
653 outptr
[2] = inptr
[1];
654 outptr
[1] = inptr
[2];
655 outptr
[0] = inptr
[3];
657 outptr
[0] = inptr
[0];
658 outptr
[1] = inptr
[1];
659 outptr
[2] = inptr
[2];
660 outptr
[3] = inptr
[3];
669 /* Determine the status. */
670 if (*inptrp
== inend
)
671 result
= __GCONV_EMPTY_INPUT
;
672 else if (*outptrp
== outend
)
673 result
= __GCONV_FULL_OUTPUT
;
675 result
= __GCONV_INCOMPLETE_INPUT
;
683 ucs4le_internal_loop_single (struct __gconv_step
*step
,
684 struct __gconv_step_data
*step_data
,
685 const unsigned char **inptrp
,
686 const unsigned char *inend
,
687 unsigned char **outptrp
, unsigned char *outend
,
688 size_t *irreversible
)
690 mbstate_t *state
= step_data
->__statep
;
691 int flags
= step_data
->__flags
;
692 size_t cnt
= state
->__count
& 7;
694 while (*inptrp
< inend
&& cnt
< 4)
695 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
697 if (__builtin_expect (cnt
, 4) < 4)
699 /* Still not enough bytes. Store the ones in the input buffer. */
700 state
->__count
&= ~7;
701 state
->__count
|= cnt
;
703 return __GCONV_INCOMPLETE_INPUT
;
706 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3], 0)
709 /* The value is too large. We don't try transliteration here since
710 this is not an error because of the lack of possibilities to
711 represent the result. This is a genuine bug in the input since
712 UCS4 does not allow such values. */
713 if (!(flags
& __GCONV_IGNORE_ERRORS
))
714 return __GCONV_ILLEGAL_INPUT
;
718 #if __BYTE_ORDER == __BIG_ENDIAN
719 (*outptrp
)[0] = state
->__value
.__wchb
[3];
720 (*outptrp
)[1] = state
->__value
.__wchb
[2];
721 (*outptrp
)[2] = state
->__value
.__wchb
[1];
722 (*outptrp
)[3] = state
->__value
.__wchb
[0];
723 #elif __BYTE_ORDER == __BIG_ENDIAN
724 (*outptrp
)[0] = state
->__value
.__wchb
[0];
725 (*outptrp
)[1] = state
->__value
.__wchb
[1];
726 (*outptrp
)[2] = state
->__value
.__wchb
[2];
727 (*outptrp
)[3] = state
->__value
.__wchb
[3];
733 /* Clear the state buffer. */
734 state
->__count
&= ~7;
739 #include <iconv/skeleton.c>
742 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
743 #define DEFINE_INIT 0
744 #define DEFINE_FINI 0
745 #define MIN_NEEDED_FROM 1
746 #define MIN_NEEDED_TO 4
747 #define FROM_DIRECTION 1
748 #define FROM_LOOP ascii_internal_loop
749 #define TO_LOOP ascii_internal_loop /* This is not used. */
750 #define FUNCTION_NAME __gconv_transform_ascii_internal
751 #define ONE_DIRECTION 1
753 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
754 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
755 #define LOOPFCT FROM_LOOP
758 if (__builtin_expect (*inptr, 0) > '\x7f') \
760 /* The value is too large. We don't try transliteration here since \
761 this is not an error because of the lack of possibilities to \
762 represent the result. This is a genuine bug in the input since \
763 ASCII does not allow such values. */ \
764 if (! ignore_errors_p ()) \
766 /* This is no correct ANSI_X3.4-1968 character. */ \
767 result = __GCONV_ILLEGAL_INPUT; \
775 /* It's an one byte sequence. */ \
776 *((uint32_t *) outptr)++ = *inptr++; \
778 #define LOOP_NEED_FLAGS
779 #include <iconv/loop.c>
780 #include <iconv/skeleton.c>
783 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
784 #define DEFINE_INIT 0
785 #define DEFINE_FINI 0
786 #define MIN_NEEDED_FROM 4
787 #define MIN_NEEDED_TO 1
788 #define FROM_DIRECTION 1
789 #define FROM_LOOP internal_ascii_loop
790 #define TO_LOOP internal_ascii_loop /* This is not used. */
791 #define FUNCTION_NAME __gconv_transform_internal_ascii
792 #define ONE_DIRECTION 1
794 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
795 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
796 #define LOOPFCT FROM_LOOP
799 if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
801 STANDARD_ERR_HANDLER (4); \
804 /* It's an one byte sequence. */ \
805 *outptr++ = *((uint32_t *) inptr)++; \
807 #define LOOP_NEED_FLAGS
808 #include <iconv/loop.c>
809 #include <iconv/skeleton.c>
812 /* Convert from the internal (UCS4-like) format to UTF-8. */
813 #define DEFINE_INIT 0
814 #define DEFINE_FINI 0
815 #define MIN_NEEDED_FROM 4
816 #define MIN_NEEDED_TO 1
817 #define MAX_NEEDED_TO 6
818 #define FROM_DIRECTION 1
819 #define FROM_LOOP internal_utf8_loop
820 #define TO_LOOP internal_utf8_loop /* This is not used. */
821 #define FUNCTION_NAME __gconv_transform_internal_utf8
822 #define ONE_DIRECTION 1
824 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
825 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
826 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
827 #define LOOPFCT FROM_LOOP
830 uint32_t wc = *((uint32_t *) inptr); \
832 /* Since we control every character we read this cannot happen. */ \
833 assert (wc <= 0x7fffffff); \
836 /* It's an one byte sequence. */ \
837 *outptr++ = (unsigned char) wc; \
843 for (step = 2; step < 6; ++step) \
844 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
847 if (__builtin_expect (outptr + step > outend, 0)) \
850 result = __GCONV_FULL_OUTPUT; \
855 *outptr = (unsigned char) (~0xff >> step); \
860 start[step] = 0x80 | (wc & 0x3f); \
863 while (--step > 0); \
869 #include <iconv/loop.c>
870 #include <iconv/skeleton.c>
873 /* Convert from UTF-8 to the internal (UCS4-like) format. */
874 #define DEFINE_INIT 0
875 #define DEFINE_FINI 0
876 #define MIN_NEEDED_FROM 1
877 #define MAX_NEEDED_FROM 6
878 #define MIN_NEEDED_TO 4
879 #define FROM_DIRECTION 1
880 #define FROM_LOOP utf8_internal_loop
881 #define TO_LOOP utf8_internal_loop /* This is not used. */
882 #define FUNCTION_NAME __gconv_transform_utf8_internal
883 #define ONE_DIRECTION 1
885 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
886 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
887 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
888 #define LOOPFCT FROM_LOOP
895 /* Next input byte. */ \
900 /* One byte sequence. */ \
906 if (ch >= 0xc2 && ch < 0xe0) \
908 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
909 otherwise the wide character could have been represented \
910 using a single byte. */ \
914 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
916 /* We expect three bytes. */ \
920 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
922 /* We expect four bytes. */ \
926 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
928 /* We expect five bytes. */ \
932 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
934 /* We expect six bytes. */ \
942 if (! ignore_errors_p ()) \
944 /* This is an illegal encoding. */ \
945 result = __GCONV_ILLEGAL_INPUT; \
949 /* Search the end of this ill-formed UTF-8 character. This \
950 is the next byte with (x & 0xc0) != 0x80. */ \
957 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
962 if (__builtin_expect (inptr + cnt > inend, 0)) \
964 /* We don't have enough input. But before we report that check \
965 that all the bytes are correct. */ \
966 for (i = 1; inptr + i < inend; ++i) \
967 if ((inptr[i] & 0xc0) != 0x80) \
970 if (__builtin_expect (inptr + i == inend, 1)) \
972 result = __GCONV_INCOMPLETE_INPUT; \
976 if (ignore_errors_p ()) \
984 result = __GCONV_ILLEGAL_INPUT; \
988 /* Read the possible remaining bytes. */ \
989 for (i = 1; i < cnt; ++i) \
991 uint32_t byte = inptr[i]; \
993 if ((byte & 0xc0) != 0x80) \
994 /* This is an illegal encoding. */ \
1001 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1002 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1003 have been represented with fewer than cnt bytes. */ \
1004 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1006 /* This is an illegal encoding. */ \
1007 if (ignore_errors_p ()) \
1014 result = __GCONV_ILLEGAL_INPUT; \
1021 /* Now adjust the pointers and store the result. */ \
1022 *((uint32_t *) outptr)++ = ch; \
1024 #define LOOP_NEED_FLAGS
1026 #define STORE_REST \
1028 /* We store the remaining bytes while converting them into the UCS4 \
1029 format. We can assume that the first byte in the buffer is \
1030 correct and that it requires a larger number of bytes than there \
1031 are in the input buffer. */ \
1032 wint_t ch = **inptrp; \
1035 state->__count = inend - *inptrp; \
1037 if (ch >= 0xc2 && ch < 0xe0) \
1039 /* We expect two bytes. The first byte cannot be 0xc0 or \
1040 0xc1, otherwise the wide character could have been \
1041 represented using a single byte. */ \
1045 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1047 /* We expect three bytes. */ \
1051 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1053 /* We expect four bytes. */ \
1057 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1059 /* We expect five bytes. */ \
1065 /* We expect six bytes. */ \
1070 /* The first byte is already consumed. */ \
1072 while (++(*inptrp) < inend) \
1075 ch |= **inptrp & 0x3f; \
1079 /* Shift for the so far missing bytes. */ \
1082 /* Store the value. */ \
1083 state->__value.__wch = ch; \
1086 #define UNPACK_BYTES \
1088 wint_t wch = state->__value.__wch; \
1090 inlen = state->__count; \
1092 if (state->__value.__wch <= 0x7ff) \
1094 bytebuf[0] = 0xc0; \
1097 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1099 bytebuf[0] = 0xe0; \
1102 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1104 bytebuf[0] = 0xf0; \
1107 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1109 bytebuf[0] = 0xf8; \
1114 bytebuf[0] = 0xfc; \
1120 if (--ntotal < inlen) \
1121 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1124 while (ntotal > 1); \
1126 bytebuf[0] |= wch; \
1129 #include <iconv/loop.c>
1130 #include <iconv/skeleton.c>
1133 /* Convert from UCS2 to the internal (UCS4-like) format. */
1134 #define DEFINE_INIT 0
1135 #define DEFINE_FINI 0
1136 #define MIN_NEEDED_FROM 2
1137 #define MIN_NEEDED_TO 4
1138 #define FROM_DIRECTION 1
1139 #define FROM_LOOP ucs2_internal_loop
1140 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1141 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1142 #define ONE_DIRECTION 1
1144 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1145 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1146 #define LOOPFCT FROM_LOOP
1149 uint16_t u1 = *((uint16_t *) inptr); \
1151 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1153 /* Surrogate characters in UCS-2 input are not valid. Reject \
1154 them. (Catching this here is not security relevant.) */ \
1155 if (! ignore_errors_p ()) \
1157 result = __GCONV_ILLEGAL_INPUT; \
1165 *((uint32_t *) outptr)++ = u1; \
1168 #define LOOP_NEED_FLAGS
1169 #include <iconv/loop.c>
1170 #include <iconv/skeleton.c>
1173 /* Convert from the internal (UCS4-like) format to UCS2. */
1174 #define DEFINE_INIT 0
1175 #define DEFINE_FINI 0
1176 #define MIN_NEEDED_FROM 4
1177 #define MIN_NEEDED_TO 2
1178 #define FROM_DIRECTION 1
1179 #define FROM_LOOP internal_ucs2_loop
1180 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1181 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1182 #define ONE_DIRECTION 1
1184 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1185 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1186 #define LOOPFCT FROM_LOOP
1189 uint32_t val = *((uint32_t *) inptr); \
1191 if (__builtin_expect (val, 0) >= 0x10000) \
1193 STANDARD_ERR_HANDLER (4); \
1195 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1197 /* Surrogate characters in UCS-4 input are not valid. \
1198 We must catch this, because the UCS-2 output might be \
1199 interpreted as UTF-16 by other programs. If we let \
1200 surrogates pass through, attackers could make a security \
1201 hole exploit by synthesizing any desired plane 1-16 \
1203 if (! ignore_errors_p ()) \
1205 result = __GCONV_ILLEGAL_INPUT; \
1214 *((uint16_t *) outptr)++ = val; \
1218 #define LOOP_NEED_FLAGS
1219 #include <iconv/loop.c>
1220 #include <iconv/skeleton.c>
1223 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1224 #define DEFINE_INIT 0
1225 #define DEFINE_FINI 0
1226 #define MIN_NEEDED_FROM 2
1227 #define MIN_NEEDED_TO 4
1228 #define FROM_DIRECTION 1
1229 #define FROM_LOOP ucs2reverse_internal_loop
1230 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1231 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1232 #define ONE_DIRECTION 1
1234 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1235 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1236 #define LOOPFCT FROM_LOOP
1239 uint16_t u1 = bswap_16 (*((uint16_t *) inptr)); \
1241 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1243 /* Surrogate characters in UCS-2 input are not valid. Reject \
1244 them. (Catching this here is not security relevant.) */ \
1245 if (! ignore_errors_p ()) \
1247 result = __GCONV_ILLEGAL_INPUT; \
1255 *((uint32_t *) outptr)++ = u1; \
1258 #define LOOP_NEED_FLAGS
1259 #include <iconv/loop.c>
1260 #include <iconv/skeleton.c>
1263 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1264 #define DEFINE_INIT 0
1265 #define DEFINE_FINI 0
1266 #define MIN_NEEDED_FROM 4
1267 #define MIN_NEEDED_TO 2
1268 #define FROM_DIRECTION 1
1269 #define FROM_LOOP internal_ucs2reverse_loop
1270 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1271 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1272 #define ONE_DIRECTION 1
1274 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1275 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1276 #define LOOPFCT FROM_LOOP
1279 uint32_t val = *((uint32_t *) inptr); \
1280 if (__builtin_expect (val, 0) >= 0x10000) \
1282 STANDARD_ERR_HANDLER (4); \
1284 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1286 /* Surrogate characters in UCS-4 input are not valid. \
1287 We must catch this, because the UCS-2 output might be \
1288 interpreted as UTF-16 by other programs. If we let \
1289 surrogates pass through, attackers could make a security \
1290 hole exploit by synthesizing any desired plane 1-16 \
1292 if (! ignore_errors_p ()) \
1294 result = __GCONV_ILLEGAL_INPUT; \
1303 *((uint16_t *) outptr)++ = bswap_16 (val); \
1307 #define LOOP_NEED_FLAGS
1308 #include <iconv/loop.c>
1309 #include <iconv/skeleton.c>