Update.
[glibc.git] / iconv / gconv_simple.c
blob019aac2a3ff57aec8250cf860976186adcffe2c4
1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
32 #ifndef EILSEQ
33 # define EILSEQ EINVAL
34 #endif
37 /* These are definitions used by some of the functions for handling
38 UTF-8 encoding below. */
39 static const uint32_t encoding_mask[] =
41 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
44 static const unsigned char encoding_byte[] =
46 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
50 /* Transform from the internal, UCS4-like format, to UCS4. The
51 difference between the internal ucs4 format and the real UCS4
52 format is, if any, the endianess. The Unicode/ISO 10646 says that
53 unless some higher protocol specifies it differently, the byte
54 order is big endian.*/
55 #define DEFINE_INIT 0
56 #define DEFINE_FINI 0
57 #define MIN_NEEDED_FROM 4
58 #define MIN_NEEDED_TO 4
59 #define FROM_DIRECTION 1
60 #define FROM_LOOP internal_ucs4_loop
61 #define TO_LOOP internal_ucs4_loop /* This is not used. */
62 #define FUNCTION_NAME __gconv_transform_internal_ucs4
65 static inline int
66 internal_ucs4_loop (struct __gconv_step *step,
67 struct __gconv_step_data *step_data,
68 const unsigned char **inptrp, const unsigned char *inend,
69 unsigned char **outptrp, unsigned char *outend,
70 size_t *irreversible)
72 const unsigned char *inptr = *inptrp;
73 unsigned char *outptr = *outptrp;
74 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
75 int result;
77 #if __BYTE_ORDER == __LITTLE_ENDIAN
78 /* Sigh, we have to do some real work. */
79 size_t cnt;
81 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
82 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
84 *inptrp = inptr;
85 *outptrp = outptr;
86 #elif __BYTE_ORDER == __BIG_ENDIAN
87 /* Simply copy the data. */
88 *inptrp = inptr + n_convert * 4;
89 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
90 #else
91 # error "This endianess is not supported."
92 #endif
94 /* Determine the status. */
95 if (*inptrp == inend)
96 result = __GCONV_EMPTY_INPUT;
97 else if (*outptrp == outend)
98 result = __GCONV_FULL_OUTPUT;
99 else
100 result = __GCONV_INCOMPLETE_INPUT;
102 return result;
105 #ifndef _STRING_ARCH_unaligned
106 static inline int
107 internal_ucs4_loop_unaligned (struct __gconv_step *step,
108 struct __gconv_step_data *step_data,
109 const unsigned char **inptrp,
110 const unsigned char *inend,
111 unsigned char **outptrp, unsigned char *outend,
112 size_t *irreversible)
114 const unsigned char *inptr = *inptrp;
115 unsigned char *outptr = *outptrp;
116 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
117 int result;
119 # if __BYTE_ORDER == __LITTLE_ENDIAN
120 /* Sigh, we have to do some real work. */
121 size_t cnt;
123 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
125 outptr[0] = inptr[3];
126 outptr[1] = inptr[2];
127 outptr[2] = inptr[1];
128 outptr[3] = inptr[0];
131 *inptrp = inptr;
132 *outptrp = outptr;
133 # elif __BYTE_ORDER == __BIG_ENDIAN
134 /* Simply copy the data. */
135 *inptrp = inptr + n_convert * 4;
136 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
137 # else
138 # error "This endianess is not supported."
139 # endif
141 /* Determine the status. */
142 if (*outptrp == outend)
143 result = __GCONV_FULL_OUTPUT;
144 else if (*inptrp == inend)
145 result = __GCONV_EMPTY_INPUT;
146 else
147 result = __GCONV_INCOMPLETE_INPUT;
149 return result;
151 #endif
154 static inline int
155 internal_ucs4_loop_single (struct __gconv_step *step,
156 struct __gconv_step_data *step_data,
157 const unsigned char **inptrp,
158 const unsigned char *inend,
159 unsigned char **outptrp, unsigned char *outend,
160 size_t *irreversible)
162 mbstate_t *state = step_data->__statep;
163 size_t cnt = state->__count & 7;
165 while (*inptrp < inend && cnt < 4)
166 state->__value.__wchb[cnt++] = *(*inptrp)++;
168 if (__builtin_expect (cnt, 4) < 4)
170 /* Still not enough bytes. Store the ones in the input buffer. */
171 state->__count &= ~7;
172 state->__count |= cnt;
174 return __GCONV_INCOMPLETE_INPUT;
177 #if __BYTE_ORDER == __LITTLE_ENDIAN
178 (*outptrp)[0] = state->__value.__wchb[3];
179 (*outptrp)[1] = state->__value.__wchb[2];
180 (*outptrp)[2] = state->__value.__wchb[1];
181 (*outptrp)[3] = state->__value.__wchb[0];
183 *outptrp += 4;
184 #elif __BYTE_ORDER == __BIG_ENDIAN
185 /* XXX unaligned */
186 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
187 #else
188 # error "This endianess is not supported."
189 #endif
191 /* Clear the state buffer. */
192 state->__count &= ~7;
194 return __GCONV_OK;
197 #include <iconv/skeleton.c>
200 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
201 for the other direction we have to check for correct values here. */
202 #define DEFINE_INIT 0
203 #define DEFINE_FINI 0
204 #define MIN_NEEDED_FROM 4
205 #define MIN_NEEDED_TO 4
206 #define FROM_DIRECTION 1
207 #define FROM_LOOP ucs4_internal_loop
208 #define TO_LOOP ucs4_internal_loop /* This is not used. */
209 #define FUNCTION_NAME __gconv_transform_ucs4_internal
212 static inline int
213 ucs4_internal_loop (struct __gconv_step *step,
214 struct __gconv_step_data *step_data,
215 const unsigned char **inptrp, const unsigned char *inend,
216 unsigned char **outptrp, unsigned char *outend,
217 size_t *irreversible)
219 int flags = step_data->__flags;
220 const unsigned char *inptr = *inptrp;
221 unsigned char *outptr = *outptrp;
222 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
223 int result;
224 size_t cnt;
226 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
228 uint32_t inval;
230 #if __BYTE_ORDER == __LITTLE_ENDIAN
231 inval = bswap_32 (*(uint32_t *) inptr);
232 #else
233 inval = *(uint32_t *) inptr;
234 #endif
236 if (__builtin_expect (inval, 0) > 0x7fffffff)
238 /* The value is too large. We don't try transliteration here since
239 this is not an error because of the lack of possibilities to
240 represent the result. This is a genuine bug in the input since
241 UCS4 does not allow such values. */
242 if (flags & __GCONV_IGNORE_ERRORS)
244 /* Just ignore this character. */
245 ++*irreversible;
246 continue;
249 *inptrp = inptr;
250 *outptrp = outptr;
251 return __GCONV_ILLEGAL_INPUT;
254 *((uint32_t *) outptr)++ = inval;
257 *inptrp = inptr;
258 *outptrp = outptr;
260 /* Determine the status. */
261 if (*inptrp == inend)
262 result = __GCONV_EMPTY_INPUT;
263 else if (*outptrp == outend)
264 result = __GCONV_FULL_OUTPUT;
265 else
266 result = __GCONV_INCOMPLETE_INPUT;
268 return result;
271 #ifndef _STRING_ARCH_unaligned
272 static inline int
273 ucs4_internal_loop_unaligned (struct __gconv_step *step,
274 struct __gconv_step_data *step_data,
275 const unsigned char **inptrp,
276 const unsigned char *inend,
277 unsigned char **outptrp, unsigned char *outend,
278 size_t *irreversible)
280 int flags = step_data->__flags;
281 const unsigned char *inptr = *inptrp;
282 unsigned char *outptr = *outptrp;
283 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
284 int result;
285 size_t cnt;
287 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
289 if (__builtin_expect (inptr[0], 0) > 0x80)
291 /* The value is too large. We don't try transliteration here since
292 this is not an error because of the lack of possibilities to
293 represent the result. This is a genuine bug in the input since
294 UCS4 does not allow such values. */
295 if (flags & __GCONV_IGNORE_ERRORS)
297 /* Just ignore this character. */
298 ++*irreversible;
299 continue;
302 *inptrp = inptr;
303 *outptrp = outptr;
304 return __GCONV_ILLEGAL_INPUT;
307 # if __BYTE_ORDER == __LITTLE_ENDIAN
308 outptr[3] = inptr[0];
309 outptr[2] = inptr[1];
310 outptr[1] = inptr[2];
311 outptr[0] = inptr[3];
312 # else
313 outptr[0] = inptr[0];
314 outptr[1] = inptr[1];
315 outptr[2] = inptr[2];
316 outptr[3] = inptr[3];
317 # endif
318 outptr += 4;
321 *inptrp = inptr;
322 *outptrp = outptr;
324 /* Determine the status. */
325 if (*inptrp == inend)
326 result = __GCONV_EMPTY_INPUT;
327 else if (*outptrp == outend)
328 result = __GCONV_FULL_OUTPUT;
329 else
330 result = __GCONV_INCOMPLETE_INPUT;
332 return result;
334 #endif
337 static inline int
338 ucs4_internal_loop_single (struct __gconv_step *step,
339 struct __gconv_step_data *step_data,
340 const unsigned char **inptrp,
341 const unsigned char *inend,
342 unsigned char **outptrp, unsigned char *outend,
343 size_t *irreversible)
345 mbstate_t *state = step_data->__statep;
346 int flags = step_data->__flags;
347 size_t cnt = state->__count & 7;
349 while (*inptrp < inend && cnt < 4)
350 state->__value.__wchb[cnt++] = *(*inptrp)++;
352 if (__builtin_expect (cnt, 4) < 4)
354 /* Still not enough bytes. Store the ones in the input buffer. */
355 state->__count &= ~7;
356 state->__count |= cnt;
358 return __GCONV_INCOMPLETE_INPUT;
361 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
362 > 0x80)
364 /* The value is too large. We don't try transliteration here since
365 this is not an error because of the lack of possibilities to
366 represent the result. This is a genuine bug in the input since
367 UCS4 does not allow such values. */
368 if (!(flags & __GCONV_IGNORE_ERRORS))
370 *inptrp -= cnt - (state->__count & 7);
371 return __GCONV_ILLEGAL_INPUT;
374 else
376 #if __BYTE_ORDER == __LITTLE_ENDIAN
377 (*outptrp)[0] = state->__value.__wchb[3];
378 (*outptrp)[1] = state->__value.__wchb[2];
379 (*outptrp)[2] = state->__value.__wchb[1];
380 (*outptrp)[3] = state->__value.__wchb[0];
381 #elif __BYTE_ORDER == __BIG_ENDIAN
382 (*outptrp)[0] = state->__value.__wchb[0];
383 (*outptrp)[1] = state->__value.__wchb[1];
384 (*outptrp)[2] = state->__value.__wchb[2];
385 (*outptrp)[3] = state->__value.__wchb[3];
386 #endif
388 *outptrp += 4;
391 /* Clear the state buffer. */
392 state->__count &= ~7;
394 return __GCONV_OK;
397 #include <iconv/skeleton.c>
400 /* Similarly for the little endian form. */
401 #define DEFINE_INIT 0
402 #define DEFINE_FINI 0
403 #define MIN_NEEDED_FROM 4
404 #define MIN_NEEDED_TO 4
405 #define FROM_DIRECTION 1
406 #define FROM_LOOP internal_ucs4le_loop
407 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
408 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
411 static inline int
412 internal_ucs4le_loop (struct __gconv_step *step,
413 struct __gconv_step_data *step_data,
414 const unsigned char **inptrp, const unsigned char *inend,
415 unsigned char **outptrp, unsigned char *outend,
416 size_t *irreversible)
418 const unsigned char *inptr = *inptrp;
419 unsigned char *outptr = *outptrp;
420 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
421 int result;
423 #if __BYTE_ORDER == __BIG_ENDIAN
424 /* Sigh, we have to do some real work. */
425 size_t cnt;
427 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
428 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
430 *inptrp = inptr;
431 *outptrp = outptr;
432 #elif __BYTE_ORDER == __LITTLE_ENDIAN
433 /* Simply copy the data. */
434 *inptrp = inptr + n_convert * 4;
435 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
436 #else
437 # error "This endianess is not supported."
438 #endif
440 /* Determine the status. */
441 if (*inptrp == inend)
442 result = __GCONV_EMPTY_INPUT;
443 else if (*outptrp == outend)
444 result = __GCONV_FULL_OUTPUT;
445 else
446 result = __GCONV_INCOMPLETE_INPUT;
448 return result;
451 #ifndef _STRING_ARCH_unaligned
452 static inline int
453 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
454 struct __gconv_step_data *step_data,
455 const unsigned char **inptrp,
456 const unsigned char *inend,
457 unsigned char **outptrp, unsigned char *outend,
458 size_t *irreversible)
460 const unsigned char *inptr = *inptrp;
461 unsigned char *outptr = *outptrp;
462 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
463 int result;
465 # if __BYTE_ORDER == __BIG_ENDIAN
466 /* Sigh, we have to do some real work. */
467 size_t cnt;
469 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
471 outptr[0] = inptr[3];
472 outptr[1] = inptr[2];
473 outptr[2] = inptr[1];
474 outptr[3] = inptr[0];
477 *inptrp = inptr;
478 *outptrp = outptr;
479 # elif __BYTE_ORDER == __LITTLE_ENDIAN
480 /* Simply copy the data. */
481 *inptrp = inptr + n_convert * 4;
482 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
483 # else
484 # error "This endianess is not supported."
485 # endif
487 /* Determine the status. */
488 if (*inptrp == inend)
489 result = __GCONV_EMPTY_INPUT;
490 else if (*outptrp == outend)
491 result = __GCONV_FULL_OUTPUT;
492 else
493 result = __GCONV_INCOMPLETE_INPUT;
495 return result;
497 #endif
500 static inline int
501 internal_ucs4le_loop_single (struct __gconv_step *step,
502 struct __gconv_step_data *step_data,
503 const unsigned char **inptrp,
504 const unsigned char *inend,
505 unsigned char **outptrp, unsigned char *outend,
506 size_t *irreversible)
508 mbstate_t *state = step_data->__statep;
509 size_t cnt = state->__count & 7;
511 while (*inptrp < inend && cnt < 4)
512 state->__value.__wchb[cnt++] = *(*inptrp)++;
514 if (__builtin_expect (cnt, 4) < 4)
516 /* Still not enough bytes. Store the ones in the input buffer. */
517 state->__count &= ~7;
518 state->__count |= cnt;
520 return __GCONV_INCOMPLETE_INPUT;
523 #if __BYTE_ORDER == __BIG_ENDIAN
524 (*outptrp)[0] = state->__value.__wchb[3];
525 (*outptrp)[1] = state->__value.__wchb[2];
526 (*outptrp)[2] = state->__value.__wchb[1];
527 (*outptrp)[3] = state->__value.__wchb[0];
529 *outptrp += 4;
530 #else
531 /* XXX unaligned */
532 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
533 #endif
535 /* Clear the state buffer. */
536 state->__count &= ~7;
538 return __GCONV_OK;
541 #include <iconv/skeleton.c>
544 /* And finally from UCS4-LE to the internal encoding. */
545 #define DEFINE_INIT 0
546 #define DEFINE_FINI 0
547 #define MIN_NEEDED_FROM 4
548 #define MIN_NEEDED_TO 4
549 #define FROM_DIRECTION 1
550 #define FROM_LOOP ucs4le_internal_loop
551 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
552 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
555 static inline int
556 ucs4le_internal_loop (struct __gconv_step *step,
557 struct __gconv_step_data *step_data,
558 const unsigned char **inptrp, const unsigned char *inend,
559 unsigned char **outptrp, unsigned char *outend,
560 size_t *irreversible)
562 int flags = step_data->__flags;
563 const unsigned char *inptr = *inptrp;
564 unsigned char *outptr = *outptrp;
565 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
566 int result;
567 size_t cnt;
569 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
571 uint32_t inval;
573 #if __BYTE_ORDER == __BIG_ENDIAN
574 inval = bswap_32 (*(uint32_t *) inptr);
575 #else
576 inval = *(uint32_t *) inptr;
577 #endif
579 if (__builtin_expect (inval, 0) > 0x7fffffff)
581 /* The value is too large. We don't try transliteration here since
582 this is not an error because of the lack of possibilities to
583 represent the result. This is a genuine bug in the input since
584 UCS4 does not allow such values. */
585 if (flags & __GCONV_IGNORE_ERRORS)
587 /* Just ignore this character. */
588 ++*irreversible;
589 continue;
592 return __GCONV_ILLEGAL_INPUT;
595 *((uint32_t *) outptr)++ = inval;
598 *inptrp = inptr;
599 *outptrp = outptr;
601 /* Determine the status. */
602 if (*inptrp == inend)
603 result = __GCONV_EMPTY_INPUT;
604 else if (*outptrp == outend)
605 result = __GCONV_FULL_OUTPUT;
606 else
607 result = __GCONV_INCOMPLETE_INPUT;
609 return result;
612 #ifndef _STRING_ARCH_unaligned
613 static inline int
614 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
615 struct __gconv_step_data *step_data,
616 const unsigned char **inptrp,
617 const unsigned char *inend,
618 unsigned char **outptrp, unsigned char *outend,
619 size_t *irreversible)
621 int flags = step_data->__flags;
622 const unsigned char *inptr = *inptrp;
623 unsigned char *outptr = *outptrp;
624 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
625 int result;
626 size_t cnt;
628 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
630 if (__builtin_expect (inptr[3], 0) > 0x80)
632 /* The value is too large. We don't try transliteration here since
633 this is not an error because of the lack of possibilities to
634 represent the result. This is a genuine bug in the input since
635 UCS4 does not allow such values. */
636 if (flags & __GCONV_IGNORE_ERRORS)
638 /* Just ignore this character. */
639 ++*irreversible;
640 continue;
643 *inptrp = inptr;
644 *outptrp = outptr;
645 return __GCONV_ILLEGAL_INPUT;
648 # if __BYTE_ORDER == __BIG_ENDIAN
649 outptr[3] = inptr[0];
650 outptr[2] = inptr[1];
651 outptr[1] = inptr[2];
652 outptr[0] = inptr[3];
653 # else
654 outptr[0] = inptr[0];
655 outptr[1] = inptr[1];
656 outptr[2] = inptr[2];
657 outptr[3] = inptr[3];
658 # endif
660 outptr += 4;
663 *inptrp = inptr;
664 *outptrp = outptr;
666 /* Determine the status. */
667 if (*inptrp == inend)
668 result = __GCONV_EMPTY_INPUT;
669 else if (*outptrp == outend)
670 result = __GCONV_FULL_OUTPUT;
671 else
672 result = __GCONV_INCOMPLETE_INPUT;
674 return result;
676 #endif
679 static inline int
680 ucs4le_internal_loop_single (struct __gconv_step *step,
681 struct __gconv_step_data *step_data,
682 const unsigned char **inptrp,
683 const unsigned char *inend,
684 unsigned char **outptrp, unsigned char *outend,
685 size_t *irreversible)
687 mbstate_t *state = step_data->__statep;
688 int flags = step_data->__flags;
689 size_t cnt = state->__count & 7;
691 while (*inptrp < inend && cnt < 4)
692 state->__value.__wchb[cnt++] = *(*inptrp)++;
694 if (__builtin_expect (cnt, 4) < 4)
696 /* Still not enough bytes. Store the ones in the input buffer. */
697 state->__count &= ~7;
698 state->__count |= cnt;
700 return __GCONV_INCOMPLETE_INPUT;
703 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
704 > 0x80)
706 /* The value is too large. We don't try transliteration here since
707 this is not an error because of the lack of possibilities to
708 represent the result. This is a genuine bug in the input since
709 UCS4 does not allow such values. */
710 if (!(flags & __GCONV_IGNORE_ERRORS))
711 return __GCONV_ILLEGAL_INPUT;
713 else
715 #if __BYTE_ORDER == __BIG_ENDIAN
716 (*outptrp)[0] = state->__value.__wchb[3];
717 (*outptrp)[1] = state->__value.__wchb[2];
718 (*outptrp)[2] = state->__value.__wchb[1];
719 (*outptrp)[3] = state->__value.__wchb[0];
720 #elif __BYTE_ORDER == __BIG_ENDIAN
721 (*outptrp)[0] = state->__value.__wchb[0];
722 (*outptrp)[1] = state->__value.__wchb[1];
723 (*outptrp)[2] = state->__value.__wchb[2];
724 (*outptrp)[3] = state->__value.__wchb[3];
725 #endif
727 *outptrp += 4;
730 /* Clear the state buffer. */
731 state->__count &= ~7;
733 return __GCONV_OK;
736 #include <iconv/skeleton.c>
739 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
740 #define DEFINE_INIT 0
741 #define DEFINE_FINI 0
742 #define MIN_NEEDED_FROM 1
743 #define MIN_NEEDED_TO 4
744 #define FROM_DIRECTION 1
745 #define FROM_LOOP ascii_internal_loop
746 #define TO_LOOP ascii_internal_loop /* This is not used. */
747 #define FUNCTION_NAME __gconv_transform_ascii_internal
748 #define ONE_DIRECTION 1
750 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
751 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
752 #define LOOPFCT FROM_LOOP
753 #define BODY \
755 if (__builtin_expect (*inptr, 0) > '\x7f') \
757 /* The value is too large. We don't try transliteration here since \
758 this is not an error because of the lack of possibilities to \
759 represent the result. This is a genuine bug in the input since \
760 ASCII does not allow such values. */ \
761 if (! ignore_errors_p ()) \
763 /* This is no correct ANSI_X3.4-1968 character. */ \
764 result = __GCONV_ILLEGAL_INPUT; \
765 break; \
768 ++*irreversible; \
769 ++inptr; \
771 else \
772 /* It's an one byte sequence. */ \
773 /* XXX unaligned. */ \
774 *((uint32_t *) outptr)++ = *inptr++; \
776 #define LOOP_NEED_FLAGS
777 #include <iconv/loop.c>
778 #include <iconv/skeleton.c>
781 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
782 #define DEFINE_INIT 0
783 #define DEFINE_FINI 0
784 #define MIN_NEEDED_FROM 4
785 #define MIN_NEEDED_TO 1
786 #define FROM_DIRECTION 1
787 #define FROM_LOOP internal_ascii_loop
788 #define TO_LOOP internal_ascii_loop /* This is not used. */
789 #define FUNCTION_NAME __gconv_transform_internal_ascii
790 #define ONE_DIRECTION 1
792 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
793 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
794 #define LOOPFCT FROM_LOOP
795 #define BODY \
797 /* XXX unaligned. */ \
798 if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
800 STANDARD_ERR_HANDLER (4); \
802 else \
803 /* It's an one byte sequence. */ \
804 *outptr++ = *((uint32_t *) inptr)++; \
806 #define LOOP_NEED_FLAGS
807 #include <iconv/loop.c>
808 #include <iconv/skeleton.c>
811 /* Convert from the internal (UCS4-like) format to UTF-8. */
812 #define DEFINE_INIT 0
813 #define DEFINE_FINI 0
814 #define MIN_NEEDED_FROM 4
815 #define MIN_NEEDED_TO 1
816 #define MAX_NEEDED_TO 6
817 #define FROM_DIRECTION 1
818 #define FROM_LOOP internal_utf8_loop
819 #define TO_LOOP internal_utf8_loop /* This is not used. */
820 #define FUNCTION_NAME __gconv_transform_internal_utf8
821 #define ONE_DIRECTION 1
823 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
824 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
825 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
826 #define LOOPFCT FROM_LOOP
827 #define BODY \
829 uint32_t wc = *((uint32_t *) inptr); \
831 /* Since we control every character we read this cannot happen. */ \
832 assert (wc <= 0x7fffffff); \
834 if (wc < 0x80) \
835 /* It's an one byte sequence. */ \
836 *outptr++ = (unsigned char) wc; \
837 else \
839 size_t step; \
840 char *start; \
842 for (step = 2; step < 6; ++step) \
843 if ((wc & encoding_mask[step - 2]) == 0) \
844 break; \
846 if (__builtin_expect (outptr + step > outend, 0)) \
848 /* Too long. */ \
849 result = __GCONV_FULL_OUTPUT; \
850 break; \
853 start = outptr; \
854 *outptr = encoding_byte[step - 2]; \
855 outptr += step; \
856 --step; \
857 do \
859 start[step] = 0x80 | (wc & 0x3f); \
860 wc >>= 6; \
862 while (--step > 0); \
863 start[0] |= wc; \
866 inptr += 4; \
868 #include <iconv/loop.c>
869 #include <iconv/skeleton.c>
872 /* Convert from UTF-8 to the internal (UCS4-like) format. */
873 #define DEFINE_INIT 0
874 #define DEFINE_FINI 0
875 #define MIN_NEEDED_FROM 1
876 #define MAX_NEEDED_FROM 6
877 #define MIN_NEEDED_TO 4
878 #define FROM_DIRECTION 1
879 #define FROM_LOOP utf8_internal_loop
880 #define TO_LOOP utf8_internal_loop /* This is not used. */
881 #define FUNCTION_NAME __gconv_transform_utf8_internal
882 #define ONE_DIRECTION 1
884 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
885 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
886 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
887 #define LOOPFCT FROM_LOOP
888 #define BODY \
890 uint32_t ch; \
891 uint_fast32_t cnt; \
892 uint_fast32_t i; \
894 /* Next input byte. */ \
895 ch = *inptr; \
897 if (ch < 0x80) \
899 /* One byte sequence. */ \
900 cnt = 1; \
901 ++inptr; \
903 else \
905 if (ch >= 0xc2 && ch < 0xe0) \
907 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
908 otherwise the wide character could have been represented \
909 using a single byte. */ \
910 cnt = 2; \
911 ch &= 0x1f; \
913 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
915 /* We expect three bytes. */ \
916 cnt = 3; \
917 ch &= 0x0f; \
919 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
921 /* We expect four bytes. */ \
922 cnt = 4; \
923 ch &= 0x07; \
925 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
927 /* We expect five bytes. */ \
928 cnt = 5; \
929 ch &= 0x03; \
931 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
933 /* We expect six bytes. */ \
934 cnt = 6; \
935 ch &= 0x01; \
937 else \
939 int skipped; \
941 if (! ignore_errors_p ()) \
943 /* This is an illegal encoding. */ \
944 result = __GCONV_ILLEGAL_INPUT; \
945 break; \
948 /* Search the end of this ill-formed UTF-8 character. This \
949 is the next byte with (x & 0xc0) != 0x80. */ \
950 skipped = 0; \
951 do \
953 ++inptr; \
954 ++skipped; \
956 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
958 continue; \
961 if (__builtin_expect (inptr + cnt > inend, 0)) \
963 /* We don't have enough input. But before we report that check \
964 that all the bytes are correct. */ \
965 for (i = 1; inptr + i < inend; ++i) \
966 if ((inptr[i] & 0xc0) != 0x80) \
967 break; \
969 if (__builtin_expect (inptr + i == inend, 1)) \
971 result = __GCONV_INCOMPLETE_INPUT; \
972 break; \
975 /* This is an illegal character. */ \
976 if (ignore_errors_p ()) \
978 /* Ignore it. */ \
979 inptr += i; \
980 ++*irreversible; \
981 continue; \
984 result = __GCONV_ILLEGAL_INPUT; \
985 break; \
988 /* Read the possible remaining bytes. */ \
989 for (i = 1; i < cnt; ++i) \
991 uint32_t byte = inptr[i]; \
993 if ((byte & 0xc0) != 0x80) \
994 /* This is an illegal encoding. */ \
995 break; \
997 ch <<= 6; \
998 ch |= byte & 0x3f; \
1001 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1002 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1003 have been represented with fewer than cnt bytes. */ \
1004 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1006 /* This is an illegal encoding. */ \
1007 if (ignore_errors_p ()) \
1009 inptr += i; \
1010 ++*irreversible; \
1011 continue; \
1014 result = __GCONV_ILLEGAL_INPUT; \
1015 break; \
1018 inptr += cnt; \
1021 /* Now adjust the pointers and store the result. */ \
1022 *((uint32_t *) outptr)++ = ch; \
1024 #define LOOP_NEED_FLAGS
1026 #define STORE_REST \
1028 /* We store the remaining bytes while converting them into the UCS4 \
1029 format. We can assume that the first byte in the buffer is \
1030 correct and that it requires a larger number of bytes than there \
1031 are in the input buffer. */ \
1032 wint_t ch = **inptrp; \
1033 size_t cnt; \
1035 state->__count = inend - *inptrp; \
1037 if (ch >= 0xc2 && ch < 0xe0) \
1039 /* We expect two bytes. The first byte cannot be 0xc0 or \
1040 0xc1, otherwise the wide character could have been \
1041 represented using a single byte. */ \
1042 cnt = 2; \
1043 ch &= 0x1f; \
1045 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1047 /* We expect three bytes. */ \
1048 cnt = 3; \
1049 ch &= 0x0f; \
1051 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1053 /* We expect four bytes. */ \
1054 cnt = 4; \
1055 ch &= 0x07; \
1057 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1059 /* We expect five bytes. */ \
1060 cnt = 5; \
1061 ch &= 0x03; \
1063 else \
1065 /* We expect six bytes. */ \
1066 cnt = 6; \
1067 ch &= 0x01; \
1070 /* The first byte is already consumed. */ \
1071 --cnt; \
1072 while (++(*inptrp) < inend) \
1074 ch <<= 6; \
1075 ch |= **inptrp & 0x3f; \
1076 --cnt; \
1079 /* Shift for the so far missing bytes. */ \
1080 ch <<= cnt * 6; \
1082 /* Store the value. */ \
1083 state->__value.__wch = ch; \
1086 #define UNPACK_BYTES \
1088 wint_t wch = state->__value.__wch; \
1089 size_t ntotal; \
1090 inlen = state->__count; \
1092 if (state->__value.__wch <= 0x7ff) \
1094 bytebuf[0] = 0xc0; \
1095 ntotal = 2; \
1097 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1099 bytebuf[0] = 0xe0; \
1100 ntotal = 3; \
1102 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1104 bytebuf[0] = 0xf0; \
1105 ntotal = 4; \
1107 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1109 bytebuf[0] = 0xf8; \
1110 ntotal = 5; \
1112 else \
1114 bytebuf[0] = 0xfc; \
1115 ntotal = 6; \
1118 do \
1120 if (--ntotal < inlen) \
1121 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1122 wch >>= 6; \
1124 while (ntotal > 1); \
1126 bytebuf[0] |= wch; \
1129 #include <iconv/loop.c>
1130 #include <iconv/skeleton.c>
1133 /* Convert from UCS2 to the internal (UCS4-like) format. */
1134 #define DEFINE_INIT 0
1135 #define DEFINE_FINI 0
1136 #define MIN_NEEDED_FROM 2
1137 #define MIN_NEEDED_TO 4
1138 #define FROM_DIRECTION 1
1139 #define FROM_LOOP ucs2_internal_loop
1140 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1141 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1142 #define ONE_DIRECTION 1
1144 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1145 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1146 #define LOOPFCT FROM_LOOP
1147 #define BODY \
1148 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1149 #include <iconv/loop.c>
1150 #include <iconv/skeleton.c>
1153 /* Convert from the internal (UCS4-like) format to UCS2. */
1154 #define DEFINE_INIT 0
1155 #define DEFINE_FINI 0
1156 #define MIN_NEEDED_FROM 4
1157 #define MIN_NEEDED_TO 2
1158 #define FROM_DIRECTION 1
1159 #define FROM_LOOP internal_ucs2_loop
1160 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1161 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1162 #define ONE_DIRECTION 1
1164 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1165 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1166 #define LOOPFCT FROM_LOOP
1167 #define BODY \
1169 if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \
1171 STANDARD_ERR_HANDLER (4); \
1173 else \
1174 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
1176 #define LOOP_NEED_FLAGS
1177 #include <iconv/loop.c>
1178 #include <iconv/skeleton.c>
1181 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1182 #define DEFINE_INIT 0
1183 #define DEFINE_FINI 0
1184 #define MIN_NEEDED_FROM 2
1185 #define MIN_NEEDED_TO 4
1186 #define FROM_DIRECTION 1
1187 #define FROM_LOOP ucs2reverse_internal_loop
1188 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1189 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1190 #define ONE_DIRECTION 1
1192 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1193 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1194 #define LOOPFCT FROM_LOOP
1195 #define BODY \
1196 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
1197 inptr += 2;
1198 #include <iconv/loop.c>
1199 #include <iconv/skeleton.c>
1202 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1203 #define DEFINE_INIT 0
1204 #define DEFINE_FINI 0
1205 #define MIN_NEEDED_FROM 4
1206 #define MIN_NEEDED_TO 2
1207 #define FROM_DIRECTION 1
1208 #define FROM_LOOP internal_ucs2reverse_loop
1209 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1210 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1211 #define ONE_DIRECTION 1
1213 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1214 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1215 #define LOOPFCT FROM_LOOP
1216 #define BODY \
1218 uint32_t val = *((uint32_t *) inptr); \
1219 if (__builtin_expect (val, 0) >= 0x10000) \
1221 STANDARD_ERR_HANDLER (4); \
1223 *((uint16_t *) outptr)++ = bswap_16 (val); \
1224 inptr += 4; \
1226 #define LOOP_NEED_FLAGS
1227 #include <iconv/loop.c>
1228 #include <iconv/skeleton.c>