Add some more ulps.
[glibc/pb-stable.git] / iconv / gconv_simple.c
blob70c43c8fe872e6a0486533b787f0743ef0d3ab34
1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
32 #ifndef EILSEQ
33 # define EILSEQ EINVAL
34 #endif
37 /* Transform from the internal, UCS4-like format, to UCS4. The
38 difference between the internal ucs4 format and the real UCS4
39 format is, if any, the endianess. The Unicode/ISO 10646 says that
40 unless some higher protocol specifies it differently, the byte
41 order is big endian.*/
42 #define DEFINE_INIT 0
43 #define DEFINE_FINI 0
44 #define MIN_NEEDED_FROM 4
45 #define MIN_NEEDED_TO 4
46 #define FROM_DIRECTION 1
47 #define FROM_LOOP internal_ucs4_loop
48 #define TO_LOOP internal_ucs4_loop /* This is not used. */
49 #define FUNCTION_NAME __gconv_transform_internal_ucs4
52 static inline int
53 internal_ucs4_loop (struct __gconv_step *step,
54 struct __gconv_step_data *step_data,
55 const unsigned char **inptrp, const unsigned char *inend,
56 unsigned char **outptrp, unsigned char *outend,
57 size_t *irreversible)
59 const unsigned char *inptr = *inptrp;
60 unsigned char *outptr = *outptrp;
61 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
62 int result;
64 #if __BYTE_ORDER == __LITTLE_ENDIAN
65 /* Sigh, we have to do some real work. */
66 size_t cnt;
68 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
69 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
71 *inptrp = inptr;
72 *outptrp = outptr;
73 #elif __BYTE_ORDER == __BIG_ENDIAN
74 /* Simply copy the data. */
75 *inptrp = inptr + n_convert * 4;
76 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
77 #else
78 # error "This endianess is not supported."
79 #endif
81 /* Determine the status. */
82 if (*inptrp == inend)
83 result = __GCONV_EMPTY_INPUT;
84 else if (*outptrp == outend)
85 result = __GCONV_FULL_OUTPUT;
86 else
87 result = __GCONV_INCOMPLETE_INPUT;
89 return result;
92 #ifndef _STRING_ARCH_unaligned
93 static inline int
94 internal_ucs4_loop_unaligned (struct __gconv_step *step,
95 struct __gconv_step_data *step_data,
96 const unsigned char **inptrp,
97 const unsigned char *inend,
98 unsigned char **outptrp, unsigned char *outend,
99 size_t *irreversible)
101 const unsigned char *inptr = *inptrp;
102 unsigned char *outptr = *outptrp;
103 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
104 int result;
106 # if __BYTE_ORDER == __LITTLE_ENDIAN
107 /* Sigh, we have to do some real work. */
108 size_t cnt;
110 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
112 outptr[0] = inptr[3];
113 outptr[1] = inptr[2];
114 outptr[2] = inptr[1];
115 outptr[3] = inptr[0];
118 *inptrp = inptr;
119 *outptrp = outptr;
120 # elif __BYTE_ORDER == __BIG_ENDIAN
121 /* Simply copy the data. */
122 *inptrp = inptr + n_convert * 4;
123 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
124 # else
125 # error "This endianess is not supported."
126 # endif
128 /* Determine the status. */
129 if (*outptrp == outend)
130 result = __GCONV_FULL_OUTPUT;
131 else if (*inptrp == inend)
132 result = __GCONV_EMPTY_INPUT;
133 else
134 result = __GCONV_INCOMPLETE_INPUT;
136 return result;
138 #endif
141 static inline int
142 internal_ucs4_loop_single (struct __gconv_step *step,
143 struct __gconv_step_data *step_data,
144 const unsigned char **inptrp,
145 const unsigned char *inend,
146 unsigned char **outptrp, unsigned char *outend,
147 size_t *irreversible)
149 mbstate_t *state = step_data->__statep;
150 size_t cnt = state->__count & 7;
152 while (*inptrp < inend && cnt < 4)
153 state->__value.__wchb[cnt++] = *(*inptrp)++;
155 if (__builtin_expect (cnt, 4) < 4)
157 /* Still not enough bytes. Store the ones in the input buffer. */
158 state->__count &= ~7;
159 state->__count |= cnt;
161 return __GCONV_INCOMPLETE_INPUT;
164 #if __BYTE_ORDER == __LITTLE_ENDIAN
165 (*outptrp)[0] = state->__value.__wchb[3];
166 (*outptrp)[1] = state->__value.__wchb[2];
167 (*outptrp)[2] = state->__value.__wchb[1];
168 (*outptrp)[3] = state->__value.__wchb[0];
170 *outptrp += 4;
171 #elif __BYTE_ORDER == __BIG_ENDIAN
172 /* XXX unaligned */
173 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
174 #else
175 # error "This endianess is not supported."
176 #endif
178 /* Clear the state buffer. */
179 state->__count &= ~7;
181 return __GCONV_OK;
184 #include <iconv/skeleton.c>
187 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
188 for the other direction we have to check for correct values here. */
189 #define DEFINE_INIT 0
190 #define DEFINE_FINI 0
191 #define MIN_NEEDED_FROM 4
192 #define MIN_NEEDED_TO 4
193 #define FROM_DIRECTION 1
194 #define FROM_LOOP ucs4_internal_loop
195 #define TO_LOOP ucs4_internal_loop /* This is not used. */
196 #define FUNCTION_NAME __gconv_transform_ucs4_internal
199 static inline int
200 ucs4_internal_loop (struct __gconv_step *step,
201 struct __gconv_step_data *step_data,
202 const unsigned char **inptrp, const unsigned char *inend,
203 unsigned char **outptrp, unsigned char *outend,
204 size_t *irreversible)
206 int flags = step_data->__flags;
207 const unsigned char *inptr = *inptrp;
208 unsigned char *outptr = *outptrp;
209 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
210 int result;
211 size_t cnt;
213 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
215 uint32_t inval;
217 #if __BYTE_ORDER == __LITTLE_ENDIAN
218 inval = bswap_32 (*(uint32_t *) inptr);
219 #else
220 inval = *(uint32_t *) inptr;
221 #endif
223 if (__builtin_expect (inval, 0) > 0x7fffffff)
225 /* The value is too large. We don't try transliteration here since
226 this is not an error because of the lack of possibilities to
227 represent the result. This is a genuine bug in the input since
228 UCS4 does not allow such values. */
229 if (irreversible == NULL)
230 /* We are transliterating, don't try to correct anything. */
231 return __GCONV_ILLEGAL_INPUT;
233 if (flags & __GCONV_IGNORE_ERRORS)
235 /* Just ignore this character. */
236 ++*irreversible;
237 continue;
240 *inptrp = inptr;
241 *outptrp = outptr;
242 return __GCONV_ILLEGAL_INPUT;
245 *((uint32_t *) outptr)++ = inval;
248 *inptrp = inptr;
249 *outptrp = outptr;
251 /* Determine the status. */
252 if (*inptrp == inend)
253 result = __GCONV_EMPTY_INPUT;
254 else if (*outptrp == outend)
255 result = __GCONV_FULL_OUTPUT;
256 else
257 result = __GCONV_INCOMPLETE_INPUT;
259 return result;
262 #ifndef _STRING_ARCH_unaligned
263 static inline int
264 ucs4_internal_loop_unaligned (struct __gconv_step *step,
265 struct __gconv_step_data *step_data,
266 const unsigned char **inptrp,
267 const unsigned char *inend,
268 unsigned char **outptrp, unsigned char *outend,
269 size_t *irreversible)
271 int flags = step_data->__flags;
272 const unsigned char *inptr = *inptrp;
273 unsigned char *outptr = *outptrp;
274 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
275 int result;
276 size_t cnt;
278 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
280 if (__builtin_expect (inptr[0], 0) > 0x80)
282 /* The value is too large. We don't try transliteration here since
283 this is not an error because of the lack of possibilities to
284 represent the result. This is a genuine bug in the input since
285 UCS4 does not allow such values. */
286 if (irreversible == NULL)
287 /* We are transliterating, don't try to correct anything. */
288 return __GCONV_ILLEGAL_INPUT;
290 if (flags & __GCONV_IGNORE_ERRORS)
292 /* Just ignore this character. */
293 ++*irreversible;
294 continue;
297 *inptrp = inptr;
298 *outptrp = outptr;
299 return __GCONV_ILLEGAL_INPUT;
302 # if __BYTE_ORDER == __LITTLE_ENDIAN
303 outptr[3] = inptr[0];
304 outptr[2] = inptr[1];
305 outptr[1] = inptr[2];
306 outptr[0] = inptr[3];
307 # else
308 outptr[0] = inptr[0];
309 outptr[1] = inptr[1];
310 outptr[2] = inptr[2];
311 outptr[3] = inptr[3];
312 # endif
313 outptr += 4;
316 *inptrp = inptr;
317 *outptrp = outptr;
319 /* Determine the status. */
320 if (*inptrp == inend)
321 result = __GCONV_EMPTY_INPUT;
322 else if (*outptrp == outend)
323 result = __GCONV_FULL_OUTPUT;
324 else
325 result = __GCONV_INCOMPLETE_INPUT;
327 return result;
329 #endif
332 static inline int
333 ucs4_internal_loop_single (struct __gconv_step *step,
334 struct __gconv_step_data *step_data,
335 const unsigned char **inptrp,
336 const unsigned char *inend,
337 unsigned char **outptrp, unsigned char *outend,
338 size_t *irreversible)
340 mbstate_t *state = step_data->__statep;
341 int flags = step_data->__flags;
342 size_t cnt = state->__count & 7;
344 while (*inptrp < inend && cnt < 4)
345 state->__value.__wchb[cnt++] = *(*inptrp)++;
347 if (__builtin_expect (cnt, 4) < 4)
349 /* Still not enough bytes. Store the ones in the input buffer. */
350 state->__count &= ~7;
351 state->__count |= cnt;
353 return __GCONV_INCOMPLETE_INPUT;
356 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
357 > 0x80)
359 /* The value is too large. We don't try transliteration here since
360 this is not an error because of the lack of possibilities to
361 represent the result. This is a genuine bug in the input since
362 UCS4 does not allow such values. */
363 if (!(flags & __GCONV_IGNORE_ERRORS))
365 *inptrp -= cnt - (state->__count & 7);
366 return __GCONV_ILLEGAL_INPUT;
369 else
371 #if __BYTE_ORDER == __LITTLE_ENDIAN
372 (*outptrp)[0] = state->__value.__wchb[3];
373 (*outptrp)[1] = state->__value.__wchb[2];
374 (*outptrp)[2] = state->__value.__wchb[1];
375 (*outptrp)[3] = state->__value.__wchb[0];
376 #elif __BYTE_ORDER == __BIG_ENDIAN
377 (*outptrp)[0] = state->__value.__wchb[0];
378 (*outptrp)[1] = state->__value.__wchb[1];
379 (*outptrp)[2] = state->__value.__wchb[2];
380 (*outptrp)[3] = state->__value.__wchb[3];
381 #endif
383 *outptrp += 4;
386 /* Clear the state buffer. */
387 state->__count &= ~7;
389 return __GCONV_OK;
392 #include <iconv/skeleton.c>
395 /* Similarly for the little endian form. */
396 #define DEFINE_INIT 0
397 #define DEFINE_FINI 0
398 #define MIN_NEEDED_FROM 4
399 #define MIN_NEEDED_TO 4
400 #define FROM_DIRECTION 1
401 #define FROM_LOOP internal_ucs4le_loop
402 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
403 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
406 static inline int
407 internal_ucs4le_loop (struct __gconv_step *step,
408 struct __gconv_step_data *step_data,
409 const unsigned char **inptrp, const unsigned char *inend,
410 unsigned char **outptrp, unsigned char *outend,
411 size_t *irreversible)
413 const unsigned char *inptr = *inptrp;
414 unsigned char *outptr = *outptrp;
415 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
416 int result;
418 #if __BYTE_ORDER == __BIG_ENDIAN
419 /* Sigh, we have to do some real work. */
420 size_t cnt;
422 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
423 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
425 *inptrp = inptr;
426 *outptrp = outptr;
427 #elif __BYTE_ORDER == __LITTLE_ENDIAN
428 /* Simply copy the data. */
429 *inptrp = inptr + n_convert * 4;
430 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
431 #else
432 # error "This endianess is not supported."
433 #endif
435 /* Determine the status. */
436 if (*inptrp == inend)
437 result = __GCONV_EMPTY_INPUT;
438 else if (*outptrp == outend)
439 result = __GCONV_FULL_OUTPUT;
440 else
441 result = __GCONV_INCOMPLETE_INPUT;
443 return result;
446 #ifndef _STRING_ARCH_unaligned
447 static inline int
448 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
449 struct __gconv_step_data *step_data,
450 const unsigned char **inptrp,
451 const unsigned char *inend,
452 unsigned char **outptrp, unsigned char *outend,
453 size_t *irreversible)
455 const unsigned char *inptr = *inptrp;
456 unsigned char *outptr = *outptrp;
457 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
458 int result;
460 # if __BYTE_ORDER == __BIG_ENDIAN
461 /* Sigh, we have to do some real work. */
462 size_t cnt;
464 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
466 outptr[0] = inptr[3];
467 outptr[1] = inptr[2];
468 outptr[2] = inptr[1];
469 outptr[3] = inptr[0];
472 *inptrp = inptr;
473 *outptrp = outptr;
474 # elif __BYTE_ORDER == __LITTLE_ENDIAN
475 /* Simply copy the data. */
476 *inptrp = inptr + n_convert * 4;
477 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
478 # else
479 # error "This endianess is not supported."
480 # endif
482 /* Determine the status. */
483 if (*inptrp == inend)
484 result = __GCONV_EMPTY_INPUT;
485 else if (*outptrp == outend)
486 result = __GCONV_FULL_OUTPUT;
487 else
488 result = __GCONV_INCOMPLETE_INPUT;
490 return result;
492 #endif
495 static inline int
496 internal_ucs4le_loop_single (struct __gconv_step *step,
497 struct __gconv_step_data *step_data,
498 const unsigned char **inptrp,
499 const unsigned char *inend,
500 unsigned char **outptrp, unsigned char *outend,
501 size_t *irreversible)
503 mbstate_t *state = step_data->__statep;
504 size_t cnt = state->__count & 7;
506 while (*inptrp < inend && cnt < 4)
507 state->__value.__wchb[cnt++] = *(*inptrp)++;
509 if (__builtin_expect (cnt, 4) < 4)
511 /* Still not enough bytes. Store the ones in the input buffer. */
512 state->__count &= ~7;
513 state->__count |= cnt;
515 return __GCONV_INCOMPLETE_INPUT;
518 #if __BYTE_ORDER == __BIG_ENDIAN
519 (*outptrp)[0] = state->__value.__wchb[3];
520 (*outptrp)[1] = state->__value.__wchb[2];
521 (*outptrp)[2] = state->__value.__wchb[1];
522 (*outptrp)[3] = state->__value.__wchb[0];
524 *outptrp += 4;
525 #else
526 /* XXX unaligned */
527 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
528 #endif
530 /* Clear the state buffer. */
531 state->__count &= ~7;
533 return __GCONV_OK;
536 #include <iconv/skeleton.c>
539 /* And finally from UCS4-LE to the internal encoding. */
540 #define DEFINE_INIT 0
541 #define DEFINE_FINI 0
542 #define MIN_NEEDED_FROM 4
543 #define MIN_NEEDED_TO 4
544 #define FROM_DIRECTION 1
545 #define FROM_LOOP ucs4le_internal_loop
546 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
547 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
550 static inline int
551 ucs4le_internal_loop (struct __gconv_step *step,
552 struct __gconv_step_data *step_data,
553 const unsigned char **inptrp, const unsigned char *inend,
554 unsigned char **outptrp, unsigned char *outend,
555 size_t *irreversible)
557 int flags = step_data->__flags;
558 const unsigned char *inptr = *inptrp;
559 unsigned char *outptr = *outptrp;
560 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
561 int result;
562 size_t cnt;
564 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
566 uint32_t inval;
568 #if __BYTE_ORDER == __BIG_ENDIAN
569 inval = bswap_32 (*(uint32_t *) inptr);
570 #else
571 inval = *(uint32_t *) inptr;
572 #endif
574 if (__builtin_expect (inval, 0) > 0x7fffffff)
576 /* The value is too large. We don't try transliteration here since
577 this is not an error because of the lack of possibilities to
578 represent the result. This is a genuine bug in the input since
579 UCS4 does not allow such values. */
580 if (irreversible == NULL)
581 /* We are transliterating, don't try to correct anything. */
582 return __GCONV_ILLEGAL_INPUT;
584 if (flags & __GCONV_IGNORE_ERRORS)
586 /* Just ignore this character. */
587 ++*irreversible;
588 continue;
591 return __GCONV_ILLEGAL_INPUT;
594 *((uint32_t *) outptr)++ = inval;
597 *inptrp = inptr;
598 *outptrp = outptr;
600 /* Determine the status. */
601 if (*inptrp == inend)
602 result = __GCONV_EMPTY_INPUT;
603 else if (*outptrp == outend)
604 result = __GCONV_FULL_OUTPUT;
605 else
606 result = __GCONV_INCOMPLETE_INPUT;
608 return result;
611 #ifndef _STRING_ARCH_unaligned
612 static inline int
613 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
614 struct __gconv_step_data *step_data,
615 const unsigned char **inptrp,
616 const unsigned char *inend,
617 unsigned char **outptrp, unsigned char *outend,
618 size_t *irreversible)
620 int flags = step_data->__flags;
621 const unsigned char *inptr = *inptrp;
622 unsigned char *outptr = *outptrp;
623 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
624 int result;
625 size_t cnt;
627 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
629 if (__builtin_expect (inptr[3], 0) > 0x80)
631 /* The value is too large. We don't try transliteration here since
632 this is not an error because of the lack of possibilities to
633 represent the result. This is a genuine bug in the input since
634 UCS4 does not allow such values. */
635 if (irreversible == NULL)
636 /* We are transliterating, don't try to correct anything. */
637 return __GCONV_ILLEGAL_INPUT;
639 if (flags & __GCONV_IGNORE_ERRORS)
641 /* Just ignore this character. */
642 ++*irreversible;
643 continue;
646 *inptrp = inptr;
647 *outptrp = outptr;
648 return __GCONV_ILLEGAL_INPUT;
651 # if __BYTE_ORDER == __BIG_ENDIAN
652 outptr[3] = inptr[0];
653 outptr[2] = inptr[1];
654 outptr[1] = inptr[2];
655 outptr[0] = inptr[3];
656 # else
657 outptr[0] = inptr[0];
658 outptr[1] = inptr[1];
659 outptr[2] = inptr[2];
660 outptr[3] = inptr[3];
661 # endif
663 outptr += 4;
666 *inptrp = inptr;
667 *outptrp = outptr;
669 /* Determine the status. */
670 if (*inptrp == inend)
671 result = __GCONV_EMPTY_INPUT;
672 else if (*outptrp == outend)
673 result = __GCONV_FULL_OUTPUT;
674 else
675 result = __GCONV_INCOMPLETE_INPUT;
677 return result;
679 #endif
682 static inline int
683 ucs4le_internal_loop_single (struct __gconv_step *step,
684 struct __gconv_step_data *step_data,
685 const unsigned char **inptrp,
686 const unsigned char *inend,
687 unsigned char **outptrp, unsigned char *outend,
688 size_t *irreversible)
690 mbstate_t *state = step_data->__statep;
691 int flags = step_data->__flags;
692 size_t cnt = state->__count & 7;
694 while (*inptrp < inend && cnt < 4)
695 state->__value.__wchb[cnt++] = *(*inptrp)++;
697 if (__builtin_expect (cnt, 4) < 4)
699 /* Still not enough bytes. Store the ones in the input buffer. */
700 state->__count &= ~7;
701 state->__count |= cnt;
703 return __GCONV_INCOMPLETE_INPUT;
706 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
707 > 0x80)
709 /* The value is too large. We don't try transliteration here since
710 this is not an error because of the lack of possibilities to
711 represent the result. This is a genuine bug in the input since
712 UCS4 does not allow such values. */
713 if (!(flags & __GCONV_IGNORE_ERRORS))
714 return __GCONV_ILLEGAL_INPUT;
716 else
718 #if __BYTE_ORDER == __BIG_ENDIAN
719 (*outptrp)[0] = state->__value.__wchb[3];
720 (*outptrp)[1] = state->__value.__wchb[2];
721 (*outptrp)[2] = state->__value.__wchb[1];
722 (*outptrp)[3] = state->__value.__wchb[0];
723 #elif __BYTE_ORDER == __BIG_ENDIAN
724 (*outptrp)[0] = state->__value.__wchb[0];
725 (*outptrp)[1] = state->__value.__wchb[1];
726 (*outptrp)[2] = state->__value.__wchb[2];
727 (*outptrp)[3] = state->__value.__wchb[3];
728 #endif
730 *outptrp += 4;
733 /* Clear the state buffer. */
734 state->__count &= ~7;
736 return __GCONV_OK;
739 #include <iconv/skeleton.c>
742 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
743 #define DEFINE_INIT 0
744 #define DEFINE_FINI 0
745 #define MIN_NEEDED_FROM 1
746 #define MIN_NEEDED_TO 4
747 #define FROM_DIRECTION 1
748 #define FROM_LOOP ascii_internal_loop
749 #define TO_LOOP ascii_internal_loop /* This is not used. */
750 #define FUNCTION_NAME __gconv_transform_ascii_internal
751 #define ONE_DIRECTION 1
753 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
754 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
755 #define LOOPFCT FROM_LOOP
756 #define BODY \
758 if (__builtin_expect (*inptr, 0) > '\x7f') \
760 /* The value is too large. We don't try transliteration here since \
761 this is not an error because of the lack of possibilities to \
762 represent the result. This is a genuine bug in the input since \
763 ASCII does not allow such values. */ \
764 if (! ignore_errors_p ()) \
766 /* This is no correct ANSI_X3.4-1968 character. */ \
767 result = __GCONV_ILLEGAL_INPUT; \
768 break; \
771 ++*irreversible; \
772 ++inptr; \
774 else \
775 /* It's an one byte sequence. */ \
776 *((uint32_t *) outptr)++ = *inptr++; \
778 #define LOOP_NEED_FLAGS
779 #include <iconv/loop.c>
780 #include <iconv/skeleton.c>
783 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
784 #define DEFINE_INIT 0
785 #define DEFINE_FINI 0
786 #define MIN_NEEDED_FROM 4
787 #define MIN_NEEDED_TO 1
788 #define FROM_DIRECTION 1
789 #define FROM_LOOP internal_ascii_loop
790 #define TO_LOOP internal_ascii_loop /* This is not used. */
791 #define FUNCTION_NAME __gconv_transform_internal_ascii
792 #define ONE_DIRECTION 1
794 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
795 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
796 #define LOOPFCT FROM_LOOP
797 #define BODY \
799 if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
801 STANDARD_ERR_HANDLER (4); \
803 else \
804 /* It's an one byte sequence. */ \
805 *outptr++ = *((uint32_t *) inptr)++; \
807 #define LOOP_NEED_FLAGS
808 #include <iconv/loop.c>
809 #include <iconv/skeleton.c>
812 /* Convert from the internal (UCS4-like) format to UTF-8. */
813 #define DEFINE_INIT 0
814 #define DEFINE_FINI 0
815 #define MIN_NEEDED_FROM 4
816 #define MIN_NEEDED_TO 1
817 #define MAX_NEEDED_TO 6
818 #define FROM_DIRECTION 1
819 #define FROM_LOOP internal_utf8_loop
820 #define TO_LOOP internal_utf8_loop /* This is not used. */
821 #define FUNCTION_NAME __gconv_transform_internal_utf8
822 #define ONE_DIRECTION 1
824 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
825 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
826 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
827 #define LOOPFCT FROM_LOOP
828 #define BODY \
830 uint32_t wc = *((uint32_t *) inptr); \
832 /* Since we control every character we read this cannot happen. */ \
833 assert (wc <= 0x7fffffff); \
835 if (wc < 0x80) \
836 /* It's an one byte sequence. */ \
837 *outptr++ = (unsigned char) wc; \
838 else \
840 size_t step; \
841 char *start; \
843 for (step = 2; step < 6; ++step) \
844 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
845 break; \
847 if (__builtin_expect (outptr + step > outend, 0)) \
849 /* Too long. */ \
850 result = __GCONV_FULL_OUTPUT; \
851 break; \
854 start = outptr; \
855 *outptr = (unsigned char) (~0xff >> step); \
856 outptr += step; \
857 --step; \
858 do \
860 start[step] = 0x80 | (wc & 0x3f); \
861 wc >>= 6; \
863 while (--step > 0); \
864 start[0] |= wc; \
867 inptr += 4; \
869 #include <iconv/loop.c>
870 #include <iconv/skeleton.c>
873 /* Convert from UTF-8 to the internal (UCS4-like) format. */
874 #define DEFINE_INIT 0
875 #define DEFINE_FINI 0
876 #define MIN_NEEDED_FROM 1
877 #define MAX_NEEDED_FROM 6
878 #define MIN_NEEDED_TO 4
879 #define FROM_DIRECTION 1
880 #define FROM_LOOP utf8_internal_loop
881 #define TO_LOOP utf8_internal_loop /* This is not used. */
882 #define FUNCTION_NAME __gconv_transform_utf8_internal
883 #define ONE_DIRECTION 1
885 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
886 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
887 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
888 #define LOOPFCT FROM_LOOP
889 #define BODY \
891 uint32_t ch; \
892 uint_fast32_t cnt; \
893 uint_fast32_t i; \
895 /* Next input byte. */ \
896 ch = *inptr; \
898 if (ch < 0x80) \
900 /* One byte sequence. */ \
901 cnt = 1; \
902 ++inptr; \
904 else \
906 if (ch >= 0xc2 && ch < 0xe0) \
908 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
909 otherwise the wide character could have been represented \
910 using a single byte. */ \
911 cnt = 2; \
912 ch &= 0x1f; \
914 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
916 /* We expect three bytes. */ \
917 cnt = 3; \
918 ch &= 0x0f; \
920 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
922 /* We expect four bytes. */ \
923 cnt = 4; \
924 ch &= 0x07; \
926 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
928 /* We expect five bytes. */ \
929 cnt = 5; \
930 ch &= 0x03; \
932 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
934 /* We expect six bytes. */ \
935 cnt = 6; \
936 ch &= 0x01; \
938 else \
940 int skipped; \
942 if (! ignore_errors_p ()) \
944 /* This is an illegal encoding. */ \
945 result = __GCONV_ILLEGAL_INPUT; \
946 break; \
949 /* Search the end of this ill-formed UTF-8 character. This \
950 is the next byte with (x & 0xc0) != 0x80. */ \
951 skipped = 0; \
952 do \
954 ++inptr; \
955 ++skipped; \
957 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
959 continue; \
962 if (__builtin_expect (inptr + cnt > inend, 0)) \
964 /* We don't have enough input. But before we report that check \
965 that all the bytes are correct. */ \
966 for (i = 1; inptr + i < inend; ++i) \
967 if ((inptr[i] & 0xc0) != 0x80) \
968 break; \
970 if (__builtin_expect (inptr + i == inend, 1)) \
972 result = __GCONV_INCOMPLETE_INPUT; \
973 break; \
976 if (ignore_errors_p ()) \
978 /* Ignore it. */ \
979 inptr += i; \
980 ++*irreversible; \
981 continue; \
984 result = __GCONV_ILLEGAL_INPUT; \
985 break; \
988 /* Read the possible remaining bytes. */ \
989 for (i = 1; i < cnt; ++i) \
991 uint32_t byte = inptr[i]; \
993 if ((byte & 0xc0) != 0x80) \
994 /* This is an illegal encoding. */ \
995 break; \
997 ch <<= 6; \
998 ch |= byte & 0x3f; \
1001 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1002 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1003 have been represented with fewer than cnt bytes. */ \
1004 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1006 /* This is an illegal encoding. */ \
1007 if (ignore_errors_p ()) \
1009 inptr += i; \
1010 ++*irreversible; \
1011 continue; \
1014 result = __GCONV_ILLEGAL_INPUT; \
1015 break; \
1018 inptr += cnt; \
1021 /* Now adjust the pointers and store the result. */ \
1022 *((uint32_t *) outptr)++ = ch; \
1024 #define LOOP_NEED_FLAGS
1026 #define STORE_REST \
1028 /* We store the remaining bytes while converting them into the UCS4 \
1029 format. We can assume that the first byte in the buffer is \
1030 correct and that it requires a larger number of bytes than there \
1031 are in the input buffer. */ \
1032 wint_t ch = **inptrp; \
1033 size_t cnt; \
1035 state->__count = inend - *inptrp; \
1037 if (ch >= 0xc2 && ch < 0xe0) \
1039 /* We expect two bytes. The first byte cannot be 0xc0 or \
1040 0xc1, otherwise the wide character could have been \
1041 represented using a single byte. */ \
1042 cnt = 2; \
1043 ch &= 0x1f; \
1045 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1047 /* We expect three bytes. */ \
1048 cnt = 3; \
1049 ch &= 0x0f; \
1051 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1053 /* We expect four bytes. */ \
1054 cnt = 4; \
1055 ch &= 0x07; \
1057 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1059 /* We expect five bytes. */ \
1060 cnt = 5; \
1061 ch &= 0x03; \
1063 else \
1065 /* We expect six bytes. */ \
1066 cnt = 6; \
1067 ch &= 0x01; \
1070 /* The first byte is already consumed. */ \
1071 --cnt; \
1072 while (++(*inptrp) < inend) \
1074 ch <<= 6; \
1075 ch |= **inptrp & 0x3f; \
1076 --cnt; \
1079 /* Shift for the so far missing bytes. */ \
1080 ch <<= cnt * 6; \
1082 /* Store the value. */ \
1083 state->__value.__wch = ch; \
1086 #define UNPACK_BYTES \
1088 wint_t wch = state->__value.__wch; \
1089 size_t ntotal; \
1090 inlen = state->__count; \
1092 if (state->__value.__wch <= 0x7ff) \
1094 bytebuf[0] = 0xc0; \
1095 ntotal = 2; \
1097 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1099 bytebuf[0] = 0xe0; \
1100 ntotal = 3; \
1102 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1104 bytebuf[0] = 0xf0; \
1105 ntotal = 4; \
1107 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1109 bytebuf[0] = 0xf8; \
1110 ntotal = 5; \
1112 else \
1114 bytebuf[0] = 0xfc; \
1115 ntotal = 6; \
1118 do \
1120 if (--ntotal < inlen) \
1121 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1122 wch >>= 6; \
1124 while (ntotal > 1); \
1126 bytebuf[0] |= wch; \
1129 #include <iconv/loop.c>
1130 #include <iconv/skeleton.c>
1133 /* Convert from UCS2 to the internal (UCS4-like) format. */
1134 #define DEFINE_INIT 0
1135 #define DEFINE_FINI 0
1136 #define MIN_NEEDED_FROM 2
1137 #define MIN_NEEDED_TO 4
1138 #define FROM_DIRECTION 1
1139 #define FROM_LOOP ucs2_internal_loop
1140 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1141 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1142 #define ONE_DIRECTION 1
1144 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1145 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1146 #define LOOPFCT FROM_LOOP
1147 #define BODY \
1149 uint16_t u1 = *((uint16_t *) inptr); \
1151 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1153 /* Surrogate characters in UCS-2 input are not valid. Reject \
1154 them. (Catching this here is not security relevant.) */ \
1155 if (! ignore_errors_p ()) \
1157 result = __GCONV_ILLEGAL_INPUT; \
1158 break; \
1160 inptr += 2; \
1161 ++*irreversible; \
1162 continue; \
1165 *((uint32_t *) outptr)++ = u1; \
1166 inptr += 2; \
1168 #define LOOP_NEED_FLAGS
1169 #include <iconv/loop.c>
1170 #include <iconv/skeleton.c>
1173 /* Convert from the internal (UCS4-like) format to UCS2. */
1174 #define DEFINE_INIT 0
1175 #define DEFINE_FINI 0
1176 #define MIN_NEEDED_FROM 4
1177 #define MIN_NEEDED_TO 2
1178 #define FROM_DIRECTION 1
1179 #define FROM_LOOP internal_ucs2_loop
1180 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1181 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1182 #define ONE_DIRECTION 1
1184 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1185 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1186 #define LOOPFCT FROM_LOOP
1187 #define BODY \
1189 uint32_t val = *((uint32_t *) inptr); \
1191 if (__builtin_expect (val, 0) >= 0x10000) \
1193 STANDARD_ERR_HANDLER (4); \
1195 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1197 /* Surrogate characters in UCS-4 input are not valid. \
1198 We must catch this, because the UCS-2 output might be \
1199 interpreted as UTF-16 by other programs. If we let \
1200 surrogates pass through, attackers could make a security \
1201 hole exploit by synthesizing any desired plane 1-16 \
1202 character. */ \
1203 if (! ignore_errors_p ()) \
1205 result = __GCONV_ILLEGAL_INPUT; \
1206 break; \
1208 inptr += 4; \
1209 ++*irreversible; \
1210 continue; \
1212 else \
1214 *((uint16_t *) outptr)++ = val; \
1215 inptr += 4; \
1218 #define LOOP_NEED_FLAGS
1219 #include <iconv/loop.c>
1220 #include <iconv/skeleton.c>
1223 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1224 #define DEFINE_INIT 0
1225 #define DEFINE_FINI 0
1226 #define MIN_NEEDED_FROM 2
1227 #define MIN_NEEDED_TO 4
1228 #define FROM_DIRECTION 1
1229 #define FROM_LOOP ucs2reverse_internal_loop
1230 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1231 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1232 #define ONE_DIRECTION 1
1234 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1235 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1236 #define LOOPFCT FROM_LOOP
1237 #define BODY \
1239 uint16_t u1 = bswap_16 (*((uint16_t *) inptr)); \
1241 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1243 /* Surrogate characters in UCS-2 input are not valid. Reject \
1244 them. (Catching this here is not security relevant.) */ \
1245 if (! ignore_errors_p ()) \
1247 result = __GCONV_ILLEGAL_INPUT; \
1248 break; \
1250 inptr += 2; \
1251 ++*irreversible; \
1252 continue; \
1255 *((uint32_t *) outptr)++ = u1; \
1256 inptr += 2; \
1258 #define LOOP_NEED_FLAGS
1259 #include <iconv/loop.c>
1260 #include <iconv/skeleton.c>
1263 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1264 #define DEFINE_INIT 0
1265 #define DEFINE_FINI 0
1266 #define MIN_NEEDED_FROM 4
1267 #define MIN_NEEDED_TO 2
1268 #define FROM_DIRECTION 1
1269 #define FROM_LOOP internal_ucs2reverse_loop
1270 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1271 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1272 #define ONE_DIRECTION 1
1274 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1275 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1276 #define LOOPFCT FROM_LOOP
1277 #define BODY \
1279 uint32_t val = *((uint32_t *) inptr); \
1280 if (__builtin_expect (val, 0) >= 0x10000) \
1282 STANDARD_ERR_HANDLER (4); \
1284 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1286 /* Surrogate characters in UCS-4 input are not valid. \
1287 We must catch this, because the UCS-2 output might be \
1288 interpreted as UTF-16 by other programs. If we let \
1289 surrogates pass through, attackers could make a security \
1290 hole exploit by synthesizing any desired plane 1-16 \
1291 character. */ \
1292 if (! ignore_errors_p ()) \
1294 result = __GCONV_ILLEGAL_INPUT; \
1295 break; \
1297 inptr += 4; \
1298 ++*irreversible; \
1299 continue; \
1301 else \
1303 *((uint16_t *) outptr)++ = bswap_16 (val); \
1304 inptr += 4; \
1307 #define LOOP_NEED_FLAGS
1308 #include <iconv/loop.c>
1309 #include <iconv/skeleton.c>