1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
30 #include <sys/param.h>
33 # define EILSEQ EINVAL
37 /* These are definitions used by some of the functions for handling
38 UTF-8 encoding below. */
39 static const uint32_t encoding_mask
[] =
41 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
44 static const unsigned char encoding_byte
[] =
46 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
50 /* Transform from the internal, UCS4-like format, to UCS4. The
51 difference between the internal ucs4 format and the real UCS4
52 format is, if any, the endianess. The Unicode/ISO 10646 says that
53 unless some higher protocol specifies it differently, the byte
54 order is big endian.*/
57 #define MIN_NEEDED_FROM 4
58 #define MIN_NEEDED_TO 4
59 #define FROM_DIRECTION 1
60 #define FROM_LOOP internal_ucs4_loop
61 #define TO_LOOP internal_ucs4_loop /* This is not used. */
62 #define FUNCTION_NAME __gconv_transform_internal_ucs4
66 internal_ucs4_loop (struct __gconv_step
*step
,
67 struct __gconv_step_data
*step_data
,
68 const unsigned char **inptrp
, const unsigned char *inend
,
69 unsigned char **outptrp
, unsigned char *outend
,
72 const unsigned char *inptr
= *inptrp
;
73 unsigned char *outptr
= *outptrp
;
74 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
77 #if __BYTE_ORDER == __LITTLE_ENDIAN
78 /* Sigh, we have to do some real work. */
81 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
82 *((uint32_t *) outptr
)++ = bswap_32 (*(uint32_t *) inptr
);
86 #elif __BYTE_ORDER == __BIG_ENDIAN
87 /* Simply copy the data. */
88 *inptrp
= inptr
+ n_convert
* 4;
89 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
91 # error "This endianess is not supported."
94 /* Determine the status. */
96 result
= __GCONV_EMPTY_INPUT
;
97 else if (*outptrp
== outend
)
98 result
= __GCONV_FULL_OUTPUT
;
100 result
= __GCONV_INCOMPLETE_INPUT
;
105 #ifndef _STRING_ARCH_unaligned
107 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
108 struct __gconv_step_data
*step_data
,
109 const unsigned char **inptrp
,
110 const unsigned char *inend
,
111 unsigned char **outptrp
, unsigned char *outend
,
112 size_t *irreversible
)
114 const unsigned char *inptr
= *inptrp
;
115 unsigned char *outptr
= *outptrp
;
116 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
119 # if __BYTE_ORDER == __LITTLE_ENDIAN
120 /* Sigh, we have to do some real work. */
123 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
125 outptr
[0] = inptr
[3];
126 outptr
[1] = inptr
[2];
127 outptr
[2] = inptr
[1];
128 outptr
[3] = inptr
[0];
133 # elif __BYTE_ORDER == __BIG_ENDIAN
134 /* Simply copy the data. */
135 *inptrp
= inptr
+ n_convert
* 4;
136 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
138 # error "This endianess is not supported."
141 /* Determine the status. */
142 if (*outptrp
== outend
)
143 result
= __GCONV_FULL_OUTPUT
;
144 else if (*inptrp
== inend
)
145 result
= __GCONV_EMPTY_INPUT
;
147 result
= __GCONV_INCOMPLETE_INPUT
;
155 internal_ucs4_loop_single (struct __gconv_step
*step
,
156 struct __gconv_step_data
*step_data
,
157 const unsigned char **inptrp
,
158 const unsigned char *inend
,
159 unsigned char **outptrp
, unsigned char *outend
,
160 size_t *irreversible
)
162 mbstate_t *state
= step_data
->__statep
;
163 size_t cnt
= state
->__count
& 7;
165 while (*inptrp
< inend
&& cnt
< 4)
166 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
168 if (__builtin_expect (cnt
, 4) < 4)
170 /* Still not enough bytes. Store the ones in the input buffer. */
171 state
->__count
&= ~7;
172 state
->__count
|= cnt
;
174 return __GCONV_INCOMPLETE_INPUT
;
177 #if __BYTE_ORDER == __LITTLE_ENDIAN
178 (*outptrp
)[0] = state
->__value
.__wchb
[3];
179 (*outptrp
)[1] = state
->__value
.__wchb
[2];
180 (*outptrp
)[2] = state
->__value
.__wchb
[1];
181 (*outptrp
)[3] = state
->__value
.__wchb
[0];
184 #elif __BYTE_ORDER == __BIG_ENDIAN
186 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
188 # error "This endianess is not supported."
191 /* Clear the state buffer. */
192 state
->__count
&= ~7;
197 #include <iconv/skeleton.c>
200 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
201 for the other direction we have to check for correct values here. */
202 #define DEFINE_INIT 0
203 #define DEFINE_FINI 0
204 #define MIN_NEEDED_FROM 4
205 #define MIN_NEEDED_TO 4
206 #define FROM_DIRECTION 1
207 #define FROM_LOOP ucs4_internal_loop
208 #define TO_LOOP ucs4_internal_loop /* This is not used. */
209 #define FUNCTION_NAME __gconv_transform_ucs4_internal
213 ucs4_internal_loop (struct __gconv_step
*step
,
214 struct __gconv_step_data
*step_data
,
215 const unsigned char **inptrp
, const unsigned char *inend
,
216 unsigned char **outptrp
, unsigned char *outend
,
217 size_t *irreversible
)
219 int flags
= step_data
->__flags
;
220 const unsigned char *inptr
= *inptrp
;
221 unsigned char *outptr
= *outptrp
;
222 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
226 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
230 #if __BYTE_ORDER == __LITTLE_ENDIAN
231 inval
= bswap_32 (*(uint32_t *) inptr
);
233 inval
= *(uint32_t *) inptr
;
236 if (__builtin_expect (inval
, 0) > 0x7fffffff)
238 /* The value is too large. We don't try transliteration here since
239 this is not an error because of the lack of possibilities to
240 represent the result. This is a genuine bug in the input since
241 UCS4 does not allow such values. */
242 if (flags
& __GCONV_IGNORE_ERRORS
)
244 /* Just ignore this character. */
251 return __GCONV_ILLEGAL_INPUT
;
254 *((uint32_t *) outptr
)++ = inval
;
260 /* Determine the status. */
261 if (*inptrp
== inend
)
262 result
= __GCONV_EMPTY_INPUT
;
263 else if (*outptrp
== outend
)
264 result
= __GCONV_FULL_OUTPUT
;
266 result
= __GCONV_INCOMPLETE_INPUT
;
271 #ifndef _STRING_ARCH_unaligned
273 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
274 struct __gconv_step_data
*step_data
,
275 const unsigned char **inptrp
,
276 const unsigned char *inend
,
277 unsigned char **outptrp
, unsigned char *outend
,
278 size_t *irreversible
)
280 int flags
= step_data
->__flags
;
281 const unsigned char *inptr
= *inptrp
;
282 unsigned char *outptr
= *outptrp
;
283 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
287 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
289 if (__builtin_expect (inptr
[0], 0) > 0x80)
291 /* The value is too large. We don't try transliteration here since
292 this is not an error because of the lack of possibilities to
293 represent the result. This is a genuine bug in the input since
294 UCS4 does not allow such values. */
295 if (flags
& __GCONV_IGNORE_ERRORS
)
297 /* Just ignore this character. */
304 return __GCONV_ILLEGAL_INPUT
;
307 # if __BYTE_ORDER == __LITTLE_ENDIAN
308 outptr
[3] = inptr
[0];
309 outptr
[2] = inptr
[1];
310 outptr
[1] = inptr
[2];
311 outptr
[0] = inptr
[3];
313 outptr
[0] = inptr
[0];
314 outptr
[1] = inptr
[1];
315 outptr
[2] = inptr
[2];
316 outptr
[3] = inptr
[3];
324 /* Determine the status. */
325 if (*inptrp
== inend
)
326 result
= __GCONV_EMPTY_INPUT
;
327 else if (*outptrp
== outend
)
328 result
= __GCONV_FULL_OUTPUT
;
330 result
= __GCONV_INCOMPLETE_INPUT
;
338 ucs4_internal_loop_single (struct __gconv_step
*step
,
339 struct __gconv_step_data
*step_data
,
340 const unsigned char **inptrp
,
341 const unsigned char *inend
,
342 unsigned char **outptrp
, unsigned char *outend
,
343 size_t *irreversible
)
345 mbstate_t *state
= step_data
->__statep
;
346 int flags
= step_data
->__flags
;
347 size_t cnt
= state
->__count
& 7;
349 while (*inptrp
< inend
&& cnt
< 4)
350 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
352 if (__builtin_expect (cnt
, 4) < 4)
354 /* Still not enough bytes. Store the ones in the input buffer. */
355 state
->__count
&= ~7;
356 state
->__count
|= cnt
;
358 return __GCONV_INCOMPLETE_INPUT
;
361 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0], 0)
364 /* The value is too large. We don't try transliteration here since
365 this is not an error because of the lack of possibilities to
366 represent the result. This is a genuine bug in the input since
367 UCS4 does not allow such values. */
368 if (!(flags
& __GCONV_IGNORE_ERRORS
))
370 *inptrp
-= cnt
- (state
->__count
& 7);
371 return __GCONV_ILLEGAL_INPUT
;
376 #if __BYTE_ORDER == __LITTLE_ENDIAN
377 (*outptrp
)[0] = state
->__value
.__wchb
[3];
378 (*outptrp
)[1] = state
->__value
.__wchb
[2];
379 (*outptrp
)[2] = state
->__value
.__wchb
[1];
380 (*outptrp
)[3] = state
->__value
.__wchb
[0];
381 #elif __BYTE_ORDER == __BIG_ENDIAN
382 (*outptrp
)[0] = state
->__value
.__wchb
[0];
383 (*outptrp
)[1] = state
->__value
.__wchb
[1];
384 (*outptrp
)[2] = state
->__value
.__wchb
[2];
385 (*outptrp
)[3] = state
->__value
.__wchb
[3];
391 /* Clear the state buffer. */
392 state
->__count
&= ~7;
397 #include <iconv/skeleton.c>
400 /* Similarly for the little endian form. */
401 #define DEFINE_INIT 0
402 #define DEFINE_FINI 0
403 #define MIN_NEEDED_FROM 4
404 #define MIN_NEEDED_TO 4
405 #define FROM_DIRECTION 1
406 #define FROM_LOOP internal_ucs4le_loop
407 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
408 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
412 internal_ucs4le_loop (struct __gconv_step
*step
,
413 struct __gconv_step_data
*step_data
,
414 const unsigned char **inptrp
, const unsigned char *inend
,
415 unsigned char **outptrp
, unsigned char *outend
,
416 size_t *irreversible
)
418 const unsigned char *inptr
= *inptrp
;
419 unsigned char *outptr
= *outptrp
;
420 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
423 #if __BYTE_ORDER == __BIG_ENDIAN
424 /* Sigh, we have to do some real work. */
427 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
428 *((uint32_t *) outptr
)++ = bswap_32 (*(uint32_t *) inptr
);
432 #elif __BYTE_ORDER == __LITTLE_ENDIAN
433 /* Simply copy the data. */
434 *inptrp
= inptr
+ n_convert
* 4;
435 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
437 # error "This endianess is not supported."
440 /* Determine the status. */
441 if (*inptrp
== inend
)
442 result
= __GCONV_EMPTY_INPUT
;
443 else if (*outptrp
== outend
)
444 result
= __GCONV_FULL_OUTPUT
;
446 result
= __GCONV_INCOMPLETE_INPUT
;
451 #ifndef _STRING_ARCH_unaligned
453 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
454 struct __gconv_step_data
*step_data
,
455 const unsigned char **inptrp
,
456 const unsigned char *inend
,
457 unsigned char **outptrp
, unsigned char *outend
,
458 size_t *irreversible
)
460 const unsigned char *inptr
= *inptrp
;
461 unsigned char *outptr
= *outptrp
;
462 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
465 # if __BYTE_ORDER == __BIG_ENDIAN
466 /* Sigh, we have to do some real work. */
469 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
471 outptr
[0] = inptr
[3];
472 outptr
[1] = inptr
[2];
473 outptr
[2] = inptr
[1];
474 outptr
[3] = inptr
[0];
479 # elif __BYTE_ORDER == __LITTLE_ENDIAN
480 /* Simply copy the data. */
481 *inptrp
= inptr
+ n_convert
* 4;
482 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
484 # error "This endianess is not supported."
487 /* Determine the status. */
488 if (*inptrp
== inend
)
489 result
= __GCONV_EMPTY_INPUT
;
490 else if (*outptrp
== outend
)
491 result
= __GCONV_FULL_OUTPUT
;
493 result
= __GCONV_INCOMPLETE_INPUT
;
501 internal_ucs4le_loop_single (struct __gconv_step
*step
,
502 struct __gconv_step_data
*step_data
,
503 const unsigned char **inptrp
,
504 const unsigned char *inend
,
505 unsigned char **outptrp
, unsigned char *outend
,
506 size_t *irreversible
)
508 mbstate_t *state
= step_data
->__statep
;
509 size_t cnt
= state
->__count
& 7;
511 while (*inptrp
< inend
&& cnt
< 4)
512 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
514 if (__builtin_expect (cnt
, 4) < 4)
516 /* Still not enough bytes. Store the ones in the input buffer. */
517 state
->__count
&= ~7;
518 state
->__count
|= cnt
;
520 return __GCONV_INCOMPLETE_INPUT
;
523 #if __BYTE_ORDER == __BIG_ENDIAN
524 (*outptrp
)[0] = state
->__value
.__wchb
[3];
525 (*outptrp
)[1] = state
->__value
.__wchb
[2];
526 (*outptrp
)[2] = state
->__value
.__wchb
[1];
527 (*outptrp
)[3] = state
->__value
.__wchb
[0];
532 *(*((uint32_t **) outptrp
)++) = state
->__value
.__wch
;
535 /* Clear the state buffer. */
536 state
->__count
&= ~7;
541 #include <iconv/skeleton.c>
544 /* And finally from UCS4-LE to the internal encoding. */
545 #define DEFINE_INIT 0
546 #define DEFINE_FINI 0
547 #define MIN_NEEDED_FROM 4
548 #define MIN_NEEDED_TO 4
549 #define FROM_DIRECTION 1
550 #define FROM_LOOP ucs4le_internal_loop
551 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
552 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
556 ucs4le_internal_loop (struct __gconv_step
*step
,
557 struct __gconv_step_data
*step_data
,
558 const unsigned char **inptrp
, const unsigned char *inend
,
559 unsigned char **outptrp
, unsigned char *outend
,
560 size_t *irreversible
)
562 int flags
= step_data
->__flags
;
563 const unsigned char *inptr
= *inptrp
;
564 unsigned char *outptr
= *outptrp
;
565 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
569 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
573 #if __BYTE_ORDER == __BIG_ENDIAN
574 inval
= bswap_32 (*(uint32_t *) inptr
);
576 inval
= *(uint32_t *) inptr
;
579 if (__builtin_expect (inval
, 0) > 0x7fffffff)
581 /* The value is too large. We don't try transliteration here since
582 this is not an error because of the lack of possibilities to
583 represent the result. This is a genuine bug in the input since
584 UCS4 does not allow such values. */
585 if (flags
& __GCONV_IGNORE_ERRORS
)
587 /* Just ignore this character. */
592 return __GCONV_ILLEGAL_INPUT
;
595 *((uint32_t *) outptr
)++ = inval
;
601 /* Determine the status. */
602 if (*inptrp
== inend
)
603 result
= __GCONV_EMPTY_INPUT
;
604 else if (*outptrp
== outend
)
605 result
= __GCONV_FULL_OUTPUT
;
607 result
= __GCONV_INCOMPLETE_INPUT
;
612 #ifndef _STRING_ARCH_unaligned
614 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
615 struct __gconv_step_data
*step_data
,
616 const unsigned char **inptrp
,
617 const unsigned char *inend
,
618 unsigned char **outptrp
, unsigned char *outend
,
619 size_t *irreversible
)
621 int flags
= step_data
->__flags
;
622 const unsigned char *inptr
= *inptrp
;
623 unsigned char *outptr
= *outptrp
;
624 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
628 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
630 if (__builtin_expect (inptr
[3], 0) > 0x80)
632 /* The value is too large. We don't try transliteration here since
633 this is not an error because of the lack of possibilities to
634 represent the result. This is a genuine bug in the input since
635 UCS4 does not allow such values. */
636 if (flags
& __GCONV_IGNORE_ERRORS
)
638 /* Just ignore this character. */
645 return __GCONV_ILLEGAL_INPUT
;
648 # if __BYTE_ORDER == __BIG_ENDIAN
649 outptr
[3] = inptr
[0];
650 outptr
[2] = inptr
[1];
651 outptr
[1] = inptr
[2];
652 outptr
[0] = inptr
[3];
654 outptr
[0] = inptr
[0];
655 outptr
[1] = inptr
[1];
656 outptr
[2] = inptr
[2];
657 outptr
[3] = inptr
[3];
666 /* Determine the status. */
667 if (*inptrp
== inend
)
668 result
= __GCONV_EMPTY_INPUT
;
669 else if (*outptrp
== outend
)
670 result
= __GCONV_FULL_OUTPUT
;
672 result
= __GCONV_INCOMPLETE_INPUT
;
680 ucs4le_internal_loop_single (struct __gconv_step
*step
,
681 struct __gconv_step_data
*step_data
,
682 const unsigned char **inptrp
,
683 const unsigned char *inend
,
684 unsigned char **outptrp
, unsigned char *outend
,
685 size_t *irreversible
)
687 mbstate_t *state
= step_data
->__statep
;
688 int flags
= step_data
->__flags
;
689 size_t cnt
= state
->__count
& 7;
691 while (*inptrp
< inend
&& cnt
< 4)
692 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
694 if (__builtin_expect (cnt
, 4) < 4)
696 /* Still not enough bytes. Store the ones in the input buffer. */
697 state
->__count
&= ~7;
698 state
->__count
|= cnt
;
700 return __GCONV_INCOMPLETE_INPUT
;
703 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3], 0)
706 /* The value is too large. We don't try transliteration here since
707 this is not an error because of the lack of possibilities to
708 represent the result. This is a genuine bug in the input since
709 UCS4 does not allow such values. */
710 if (!(flags
& __GCONV_IGNORE_ERRORS
))
711 return __GCONV_ILLEGAL_INPUT
;
715 #if __BYTE_ORDER == __BIG_ENDIAN
716 (*outptrp
)[0] = state
->__value
.__wchb
[3];
717 (*outptrp
)[1] = state
->__value
.__wchb
[2];
718 (*outptrp
)[2] = state
->__value
.__wchb
[1];
719 (*outptrp
)[3] = state
->__value
.__wchb
[0];
720 #elif __BYTE_ORDER == __BIG_ENDIAN
721 (*outptrp
)[0] = state
->__value
.__wchb
[0];
722 (*outptrp
)[1] = state
->__value
.__wchb
[1];
723 (*outptrp
)[2] = state
->__value
.__wchb
[2];
724 (*outptrp
)[3] = state
->__value
.__wchb
[3];
730 /* Clear the state buffer. */
731 state
->__count
&= ~7;
736 #include <iconv/skeleton.c>
739 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
740 #define DEFINE_INIT 0
741 #define DEFINE_FINI 0
742 #define MIN_NEEDED_FROM 1
743 #define MIN_NEEDED_TO 4
744 #define FROM_DIRECTION 1
745 #define FROM_LOOP ascii_internal_loop
746 #define TO_LOOP ascii_internal_loop /* This is not used. */
747 #define FUNCTION_NAME __gconv_transform_ascii_internal
748 #define ONE_DIRECTION 1
750 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
751 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
752 #define LOOPFCT FROM_LOOP
755 if (__builtin_expect (*inptr, 0) > '\x7f') \
757 /* The value is too large. We don't try transliteration here since \
758 this is not an error because of the lack of possibilities to \
759 represent the result. This is a genuine bug in the input since \
760 ASCII does not allow such values. */ \
761 if (! ignore_errors_p ()) \
763 /* This is no correct ANSI_X3.4-1968 character. */ \
764 result = __GCONV_ILLEGAL_INPUT; \
772 /* It's an one byte sequence. */ \
773 /* XXX unaligned. */ \
774 *((uint32_t *) outptr)++ = *inptr++; \
776 #define LOOP_NEED_FLAGS
777 #include <iconv/loop.c>
778 #include <iconv/skeleton.c>
781 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
782 #define DEFINE_INIT 0
783 #define DEFINE_FINI 0
784 #define MIN_NEEDED_FROM 4
785 #define MIN_NEEDED_TO 1
786 #define FROM_DIRECTION 1
787 #define FROM_LOOP internal_ascii_loop
788 #define TO_LOOP internal_ascii_loop /* This is not used. */
789 #define FUNCTION_NAME __gconv_transform_internal_ascii
790 #define ONE_DIRECTION 1
792 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
793 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
794 #define LOOPFCT FROM_LOOP
797 /* XXX unaligned. */ \
798 if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
800 STANDARD_ERR_HANDLER (4); \
803 /* It's an one byte sequence. */ \
804 *outptr++ = *((uint32_t *) inptr)++; \
806 #define LOOP_NEED_FLAGS
807 #include <iconv/loop.c>
808 #include <iconv/skeleton.c>
811 /* Convert from the internal (UCS4-like) format to UTF-8. */
812 #define DEFINE_INIT 0
813 #define DEFINE_FINI 0
814 #define MIN_NEEDED_FROM 4
815 #define MIN_NEEDED_TO 1
816 #define MAX_NEEDED_TO 6
817 #define FROM_DIRECTION 1
818 #define FROM_LOOP internal_utf8_loop
819 #define TO_LOOP internal_utf8_loop /* This is not used. */
820 #define FUNCTION_NAME __gconv_transform_internal_utf8
821 #define ONE_DIRECTION 1
823 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
824 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
825 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
826 #define LOOPFCT FROM_LOOP
829 uint32_t wc = *((uint32_t *) inptr); \
831 /* Since we control every character we read this cannot happen. */ \
832 assert (wc <= 0x7fffffff); \
835 /* It's an one byte sequence. */ \
836 *outptr++ = (unsigned char) wc; \
842 for (step = 2; step < 6; ++step) \
843 if ((wc & encoding_mask[step - 2]) == 0) \
846 if (__builtin_expect (outptr + step > outend, 0)) \
849 result = __GCONV_FULL_OUTPUT; \
854 *outptr = encoding_byte[step - 2]; \
859 start[step] = 0x80 | (wc & 0x3f); \
862 while (--step > 0); \
868 #include <iconv/loop.c>
869 #include <iconv/skeleton.c>
872 /* Convert from UTF-8 to the internal (UCS4-like) format. */
873 #define DEFINE_INIT 0
874 #define DEFINE_FINI 0
875 #define MIN_NEEDED_FROM 1
876 #define MAX_NEEDED_FROM 6
877 #define MIN_NEEDED_TO 4
878 #define FROM_DIRECTION 1
879 #define FROM_LOOP utf8_internal_loop
880 #define TO_LOOP utf8_internal_loop /* This is not used. */
881 #define FUNCTION_NAME __gconv_transform_utf8_internal
882 #define ONE_DIRECTION 1
884 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
885 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
886 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
887 #define LOOPFCT FROM_LOOP
894 /* Next input byte. */ \
899 /* One byte sequence. */ \
905 if (ch >= 0xc2 && ch < 0xe0) \
907 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
908 otherwise the wide character could have been represented \
909 using a single byte. */ \
913 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
915 /* We expect three bytes. */ \
919 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
921 /* We expect four bytes. */ \
925 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
927 /* We expect five bytes. */ \
931 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
933 /* We expect six bytes. */ \
941 if (! ignore_errors_p ()) \
943 /* This is an illegal encoding. */ \
944 result = __GCONV_ILLEGAL_INPUT; \
948 /* Search the end of this ill-formed UTF-8 character. This \
949 is the next byte with (x & 0xc0) != 0x80. */ \
956 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
961 if (__builtin_expect (inptr + cnt > inend, 0)) \
963 /* We don't have enough input. But before we report that check \
964 that all the bytes are correct. */ \
965 for (i = 1; inptr + i < inend; ++i) \
966 if ((inptr[i] & 0xc0) != 0x80) \
969 if (__builtin_expect (inptr + i == inend, 1)) \
971 result = __GCONV_INCOMPLETE_INPUT; \
975 /* This is an illegal character. */ \
976 if (ignore_errors_p ()) \
984 result = __GCONV_ILLEGAL_INPUT; \
988 /* Read the possible remaining bytes. */ \
989 for (i = 1; i < cnt; ++i) \
991 uint32_t byte = inptr[i]; \
993 if ((byte & 0xc0) != 0x80) \
994 /* This is an illegal encoding. */ \
1001 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1002 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1003 have been represented with fewer than cnt bytes. */ \
1004 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1006 /* This is an illegal encoding. */ \
1007 if (ignore_errors_p ()) \
1014 result = __GCONV_ILLEGAL_INPUT; \
1021 /* Now adjust the pointers and store the result. */ \
1022 *((uint32_t *) outptr)++ = ch; \
1024 #define LOOP_NEED_FLAGS
1026 #define STORE_REST \
1028 /* We store the remaining bytes while converting them into the UCS4 \
1029 format. We can assume that the first byte in the buffer is \
1030 correct and that it requires a larger number of bytes than there \
1031 are in the input buffer. */ \
1032 wint_t ch = **inptrp; \
1035 state->__count = inend - *inptrp; \
1037 if (ch >= 0xc2 && ch < 0xe0) \
1039 /* We expect two bytes. The first byte cannot be 0xc0 or \
1040 0xc1, otherwise the wide character could have been \
1041 represented using a single byte. */ \
1045 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1047 /* We expect three bytes. */ \
1051 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1053 /* We expect four bytes. */ \
1057 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1059 /* We expect five bytes. */ \
1065 /* We expect six bytes. */ \
1070 /* The first byte is already consumed. */ \
1072 while (++(*inptrp) < inend) \
1075 ch |= **inptrp & 0x3f; \
1079 /* Shift for the so far missing bytes. */ \
1082 /* Store the value. */ \
1083 state->__value.__wch = ch; \
1086 #define UNPACK_BYTES \
1088 wint_t wch = state->__value.__wch; \
1090 inlen = state->__count; \
1092 if (state->__value.__wch <= 0x7ff) \
1094 bytebuf[0] = 0xc0; \
1097 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1099 bytebuf[0] = 0xe0; \
1102 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1104 bytebuf[0] = 0xf0; \
1107 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1109 bytebuf[0] = 0xf8; \
1114 bytebuf[0] = 0xfc; \
1120 if (--ntotal < inlen) \
1121 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1124 while (ntotal > 1); \
1126 bytebuf[0] |= wch; \
1129 #include <iconv/loop.c>
1130 #include <iconv/skeleton.c>
1133 /* Convert from UCS2 to the internal (UCS4-like) format. */
1134 #define DEFINE_INIT 0
1135 #define DEFINE_FINI 0
1136 #define MIN_NEEDED_FROM 2
1137 #define MIN_NEEDED_TO 4
1138 #define FROM_DIRECTION 1
1139 #define FROM_LOOP ucs2_internal_loop
1140 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1141 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1142 #define ONE_DIRECTION 1
1144 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1145 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1146 #define LOOPFCT FROM_LOOP
1148 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1149 #include <iconv/loop.c>
1150 #include <iconv/skeleton.c>
1153 /* Convert from the internal (UCS4-like) format to UCS2. */
1154 #define DEFINE_INIT 0
1155 #define DEFINE_FINI 0
1156 #define MIN_NEEDED_FROM 4
1157 #define MIN_NEEDED_TO 2
1158 #define FROM_DIRECTION 1
1159 #define FROM_LOOP internal_ucs2_loop
1160 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1161 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1162 #define ONE_DIRECTION 1
1164 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1165 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1166 #define LOOPFCT FROM_LOOP
1169 if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \
1171 STANDARD_ERR_HANDLER (4); \
1174 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
1176 #define LOOP_NEED_FLAGS
1177 #include <iconv/loop.c>
1178 #include <iconv/skeleton.c>
1181 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1182 #define DEFINE_INIT 0
1183 #define DEFINE_FINI 0
1184 #define MIN_NEEDED_FROM 2
1185 #define MIN_NEEDED_TO 4
1186 #define FROM_DIRECTION 1
1187 #define FROM_LOOP ucs2reverse_internal_loop
1188 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1189 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1190 #define ONE_DIRECTION 1
1192 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1193 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1194 #define LOOPFCT FROM_LOOP
1196 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
1198 #include <iconv/loop.c>
1199 #include <iconv/skeleton.c>
1202 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1203 #define DEFINE_INIT 0
1204 #define DEFINE_FINI 0
1205 #define MIN_NEEDED_FROM 4
1206 #define MIN_NEEDED_TO 2
1207 #define FROM_DIRECTION 1
1208 #define FROM_LOOP internal_ucs2reverse_loop
1209 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1210 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1211 #define ONE_DIRECTION 1
1213 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1214 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1215 #define LOOPFCT FROM_LOOP
1218 uint32_t val = *((uint32_t *) inptr); \
1219 if (__builtin_expect (val, 0) >= 0x10000) \
1221 STANDARD_ERR_HANDLER (4); \
1223 *((uint16_t *) outptr)++ = bswap_16 (val); \
1226 #define LOOP_NEED_FLAGS
1227 #include <iconv/loop.c>
1228 #include <iconv/skeleton.c>