* csu/Makefile ($(csu-dummies) target rule): Don't use a temp C file.
[glibc.git] / iconv / gconv_simple.c
blob69489d892bd45bce6a373c3aea9ff5f20accfb9d
1 /* Simple transformations functions.
2 Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
42 #ifndef EILSEQ
43 # define EILSEQ EINVAL
44 #endif
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
49 wint_t
50 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
52 if (c < 0x80)
53 return c;
54 else
55 return WEOF;
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
64 #define DEFINE_INIT 0
65 #define DEFINE_FINI 0
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
74 static inline int
75 internal_ucs4_loop (struct __gconv_step *step,
76 struct __gconv_step_data *step_data,
77 const unsigned char **inptrp, const unsigned char *inend,
78 unsigned char **outptrp, unsigned char *outend,
79 size_t *irreversible)
81 const unsigned char *inptr = *inptrp;
82 unsigned char *outptr = *outptrp;
83 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
84 int result;
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 /* Sigh, we have to do some real work. */
88 size_t cnt;
90 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
91 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
93 *inptrp = inptr;
94 *outptrp = outptr;
95 #elif __BYTE_ORDER == __BIG_ENDIAN
96 /* Simply copy the data. */
97 *inptrp = inptr + n_convert * 4;
98 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
99 #else
100 # error "This endianess is not supported."
101 #endif
103 /* Determine the status. */
104 if (*inptrp == inend)
105 result = __GCONV_EMPTY_INPUT;
106 else if (*outptrp + 4 > outend)
107 result = __GCONV_FULL_OUTPUT;
108 else
109 result = __GCONV_INCOMPLETE_INPUT;
111 return result;
114 #ifndef _STRING_ARCH_unaligned
115 static inline int
116 internal_ucs4_loop_unaligned (struct __gconv_step *step,
117 struct __gconv_step_data *step_data,
118 const unsigned char **inptrp,
119 const unsigned char *inend,
120 unsigned char **outptrp, unsigned char *outend,
121 size_t *irreversible)
123 const unsigned char *inptr = *inptrp;
124 unsigned char *outptr = *outptrp;
125 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
126 int result;
128 # if __BYTE_ORDER == __LITTLE_ENDIAN
129 /* Sigh, we have to do some real work. */
130 size_t cnt;
132 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
134 outptr[0] = inptr[3];
135 outptr[1] = inptr[2];
136 outptr[2] = inptr[1];
137 outptr[3] = inptr[0];
140 *inptrp = inptr;
141 *outptrp = outptr;
142 # elif __BYTE_ORDER == __BIG_ENDIAN
143 /* Simply copy the data. */
144 *inptrp = inptr + n_convert * 4;
145 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
146 # else
147 # error "This endianess is not supported."
148 # endif
150 /* Determine the status. */
151 if (*inptrp == inend)
152 result = __GCONV_EMPTY_INPUT;
153 else if (*outptrp + 4 > outend)
154 result = __GCONV_FULL_OUTPUT;
155 else
156 result = __GCONV_INCOMPLETE_INPUT;
158 return result;
160 #endif
163 static inline int
164 internal_ucs4_loop_single (struct __gconv_step *step,
165 struct __gconv_step_data *step_data,
166 const unsigned char **inptrp,
167 const unsigned char *inend,
168 unsigned char **outptrp, unsigned char *outend,
169 size_t *irreversible)
171 mbstate_t *state = step_data->__statep;
172 size_t cnt = state->__count & 7;
174 while (*inptrp < inend && cnt < 4)
175 state->__value.__wchb[cnt++] = *(*inptrp)++;
177 if (__builtin_expect (cnt < 4, 0))
179 /* Still not enough bytes. Store the ones in the input buffer. */
180 state->__count &= ~7;
181 state->__count |= cnt;
183 return __GCONV_INCOMPLETE_INPUT;
186 #if __BYTE_ORDER == __LITTLE_ENDIAN
187 (*outptrp)[0] = state->__value.__wchb[3];
188 (*outptrp)[1] = state->__value.__wchb[2];
189 (*outptrp)[2] = state->__value.__wchb[1];
190 (*outptrp)[3] = state->__value.__wchb[0];
192 *outptrp += 4;
193 #elif __BYTE_ORDER == __BIG_ENDIAN
194 /* XXX unaligned */
195 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
196 #else
197 # error "This endianess is not supported."
198 #endif
200 /* Clear the state buffer. */
201 state->__count &= ~7;
203 return __GCONV_OK;
206 #include <iconv/skeleton.c>
209 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
210 for the other direction we have to check for correct values here. */
211 #define DEFINE_INIT 0
212 #define DEFINE_FINI 0
213 #define MIN_NEEDED_FROM 4
214 #define MIN_NEEDED_TO 4
215 #define FROM_DIRECTION 1
216 #define FROM_LOOP ucs4_internal_loop
217 #define TO_LOOP ucs4_internal_loop /* This is not used. */
218 #define FUNCTION_NAME __gconv_transform_ucs4_internal
221 static inline int
222 ucs4_internal_loop (struct __gconv_step *step,
223 struct __gconv_step_data *step_data,
224 const unsigned char **inptrp, const unsigned char *inend,
225 unsigned char **outptrp, unsigned char *outend,
226 size_t *irreversible)
228 int flags = step_data->__flags;
229 const unsigned char *inptr = *inptrp;
230 unsigned char *outptr = *outptrp;
231 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
232 int result;
233 size_t cnt;
235 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
237 uint32_t inval;
239 #if __BYTE_ORDER == __LITTLE_ENDIAN
240 inval = bswap_32 (*(const uint32_t *) inptr);
241 #else
242 inval = *(const uint32_t *) inptr;
243 #endif
245 if (__builtin_expect (inval > 0x7fffffff, 0))
247 /* The value is too large. We don't try transliteration here since
248 this is not an error because of the lack of possibilities to
249 represent the result. This is a genuine bug in the input since
250 UCS4 does not allow such values. */
251 if (irreversible == NULL)
252 /* We are transliterating, don't try to correct anything. */
253 return __GCONV_ILLEGAL_INPUT;
255 if (flags & __GCONV_IGNORE_ERRORS)
257 /* Just ignore this character. */
258 ++*irreversible;
259 continue;
262 *inptrp = inptr;
263 *outptrp = outptr;
264 return __GCONV_ILLEGAL_INPUT;
267 *((uint32_t *) outptr)++ = inval;
270 *inptrp = inptr;
271 *outptrp = outptr;
273 /* Determine the status. */
274 if (*inptrp == inend)
275 result = __GCONV_EMPTY_INPUT;
276 else if (*outptrp + 4 > outend)
277 result = __GCONV_FULL_OUTPUT;
278 else
279 result = __GCONV_INCOMPLETE_INPUT;
281 return result;
284 #ifndef _STRING_ARCH_unaligned
285 static inline int
286 ucs4_internal_loop_unaligned (struct __gconv_step *step,
287 struct __gconv_step_data *step_data,
288 const unsigned char **inptrp,
289 const unsigned char *inend,
290 unsigned char **outptrp, unsigned char *outend,
291 size_t *irreversible)
293 int flags = step_data->__flags;
294 const unsigned char *inptr = *inptrp;
295 unsigned char *outptr = *outptrp;
296 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
297 int result;
298 size_t cnt;
300 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
302 if (__builtin_expect (inptr[0] > 0x80, 0))
304 /* The value is too large. We don't try transliteration here since
305 this is not an error because of the lack of possibilities to
306 represent the result. This is a genuine bug in the input since
307 UCS4 does not allow such values. */
308 if (irreversible == NULL)
309 /* We are transliterating, don't try to correct anything. */
310 return __GCONV_ILLEGAL_INPUT;
312 if (flags & __GCONV_IGNORE_ERRORS)
314 /* Just ignore this character. */
315 ++*irreversible;
316 continue;
319 *inptrp = inptr;
320 *outptrp = outptr;
321 return __GCONV_ILLEGAL_INPUT;
324 # if __BYTE_ORDER == __LITTLE_ENDIAN
325 outptr[3] = inptr[0];
326 outptr[2] = inptr[1];
327 outptr[1] = inptr[2];
328 outptr[0] = inptr[3];
329 # else
330 outptr[0] = inptr[0];
331 outptr[1] = inptr[1];
332 outptr[2] = inptr[2];
333 outptr[3] = inptr[3];
334 # endif
335 outptr += 4;
338 *inptrp = inptr;
339 *outptrp = outptr;
341 /* Determine the status. */
342 if (*inptrp == inend)
343 result = __GCONV_EMPTY_INPUT;
344 else if (*outptrp + 4 > outend)
345 result = __GCONV_FULL_OUTPUT;
346 else
347 result = __GCONV_INCOMPLETE_INPUT;
349 return result;
351 #endif
354 static inline int
355 ucs4_internal_loop_single (struct __gconv_step *step,
356 struct __gconv_step_data *step_data,
357 const unsigned char **inptrp,
358 const unsigned char *inend,
359 unsigned char **outptrp, unsigned char *outend,
360 size_t *irreversible)
362 mbstate_t *state = step_data->__statep;
363 int flags = step_data->__flags;
364 size_t cnt = state->__count & 7;
366 while (*inptrp < inend && cnt < 4)
367 state->__value.__wchb[cnt++] = *(*inptrp)++;
369 if (__builtin_expect (cnt < 4, 0))
371 /* Still not enough bytes. Store the ones in the input buffer. */
372 state->__count &= ~7;
373 state->__count |= cnt;
375 return __GCONV_INCOMPLETE_INPUT;
378 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
381 /* The value is too large. We don't try transliteration here since
382 this is not an error because of the lack of possibilities to
383 represent the result. This is a genuine bug in the input since
384 UCS4 does not allow such values. */
385 if (!(flags & __GCONV_IGNORE_ERRORS))
387 *inptrp -= cnt - (state->__count & 7);
388 return __GCONV_ILLEGAL_INPUT;
391 else
393 #if __BYTE_ORDER == __LITTLE_ENDIAN
394 (*outptrp)[0] = state->__value.__wchb[3];
395 (*outptrp)[1] = state->__value.__wchb[2];
396 (*outptrp)[2] = state->__value.__wchb[1];
397 (*outptrp)[3] = state->__value.__wchb[0];
398 #elif __BYTE_ORDER == __BIG_ENDIAN
399 (*outptrp)[0] = state->__value.__wchb[0];
400 (*outptrp)[1] = state->__value.__wchb[1];
401 (*outptrp)[2] = state->__value.__wchb[2];
402 (*outptrp)[3] = state->__value.__wchb[3];
403 #endif
405 *outptrp += 4;
408 /* Clear the state buffer. */
409 state->__count &= ~7;
411 return __GCONV_OK;
414 #include <iconv/skeleton.c>
417 /* Similarly for the little endian form. */
418 #define DEFINE_INIT 0
419 #define DEFINE_FINI 0
420 #define MIN_NEEDED_FROM 4
421 #define MIN_NEEDED_TO 4
422 #define FROM_DIRECTION 1
423 #define FROM_LOOP internal_ucs4le_loop
424 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
425 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
428 static inline int
429 internal_ucs4le_loop (struct __gconv_step *step,
430 struct __gconv_step_data *step_data,
431 const unsigned char **inptrp, const unsigned char *inend,
432 unsigned char **outptrp, unsigned char *outend,
433 size_t *irreversible)
435 const unsigned char *inptr = *inptrp;
436 unsigned char *outptr = *outptrp;
437 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
438 int result;
440 #if __BYTE_ORDER == __BIG_ENDIAN
441 /* Sigh, we have to do some real work. */
442 size_t cnt;
444 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
445 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
447 *inptrp = inptr;
448 *outptrp = outptr;
449 #elif __BYTE_ORDER == __LITTLE_ENDIAN
450 /* Simply copy the data. */
451 *inptrp = inptr + n_convert * 4;
452 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
453 #else
454 # error "This endianess is not supported."
455 #endif
457 /* Determine the status. */
458 if (*inptrp == inend)
459 result = __GCONV_EMPTY_INPUT;
460 else if (*outptrp + 4 > outend)
461 result = __GCONV_FULL_OUTPUT;
462 else
463 result = __GCONV_INCOMPLETE_INPUT;
465 return result;
468 #ifndef _STRING_ARCH_unaligned
469 static inline int
470 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
471 struct __gconv_step_data *step_data,
472 const unsigned char **inptrp,
473 const unsigned char *inend,
474 unsigned char **outptrp, unsigned char *outend,
475 size_t *irreversible)
477 const unsigned char *inptr = *inptrp;
478 unsigned char *outptr = *outptrp;
479 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
480 int result;
482 # if __BYTE_ORDER == __BIG_ENDIAN
483 /* Sigh, we have to do some real work. */
484 size_t cnt;
486 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
488 outptr[0] = inptr[3];
489 outptr[1] = inptr[2];
490 outptr[2] = inptr[1];
491 outptr[3] = inptr[0];
494 *inptrp = inptr;
495 *outptrp = outptr;
496 # elif __BYTE_ORDER == __LITTLE_ENDIAN
497 /* Simply copy the data. */
498 *inptrp = inptr + n_convert * 4;
499 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
500 # else
501 # error "This endianess is not supported."
502 # endif
504 /* Determine the status. */
505 if (*inptrp == inend)
506 result = __GCONV_EMPTY_INPUT;
507 else if (*inptrp + 4 > inend)
508 result = __GCONV_INCOMPLETE_INPUT;
509 else
511 assert (*outptrp + 4 > outend);
512 result = __GCONV_FULL_OUTPUT;
515 return result;
517 #endif
520 static inline int
521 internal_ucs4le_loop_single (struct __gconv_step *step,
522 struct __gconv_step_data *step_data,
523 const unsigned char **inptrp,
524 const unsigned char *inend,
525 unsigned char **outptrp, unsigned char *outend,
526 size_t *irreversible)
528 mbstate_t *state = step_data->__statep;
529 size_t cnt = state->__count & 7;
531 while (*inptrp < inend && cnt < 4)
532 state->__value.__wchb[cnt++] = *(*inptrp)++;
534 if (__builtin_expect (cnt < 4, 0))
536 /* Still not enough bytes. Store the ones in the input buffer. */
537 state->__count &= ~7;
538 state->__count |= cnt;
540 return __GCONV_INCOMPLETE_INPUT;
543 #if __BYTE_ORDER == __BIG_ENDIAN
544 (*outptrp)[0] = state->__value.__wchb[3];
545 (*outptrp)[1] = state->__value.__wchb[2];
546 (*outptrp)[2] = state->__value.__wchb[1];
547 (*outptrp)[3] = state->__value.__wchb[0];
549 *outptrp += 4;
550 #else
551 /* XXX unaligned */
552 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
553 #endif
555 /* Clear the state buffer. */
556 state->__count &= ~7;
558 return __GCONV_OK;
561 #include <iconv/skeleton.c>
564 /* And finally from UCS4-LE to the internal encoding. */
565 #define DEFINE_INIT 0
566 #define DEFINE_FINI 0
567 #define MIN_NEEDED_FROM 4
568 #define MIN_NEEDED_TO 4
569 #define FROM_DIRECTION 1
570 #define FROM_LOOP ucs4le_internal_loop
571 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
572 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
575 static inline int
576 ucs4le_internal_loop (struct __gconv_step *step,
577 struct __gconv_step_data *step_data,
578 const unsigned char **inptrp, const unsigned char *inend,
579 unsigned char **outptrp, unsigned char *outend,
580 size_t *irreversible)
582 int flags = step_data->__flags;
583 const unsigned char *inptr = *inptrp;
584 unsigned char *outptr = *outptrp;
585 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
586 int result;
587 size_t cnt;
589 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
591 uint32_t inval;
593 #if __BYTE_ORDER == __BIG_ENDIAN
594 inval = bswap_32 (*(const uint32_t *) inptr);
595 #else
596 inval = *(const uint32_t *) inptr;
597 #endif
599 if (__builtin_expect (inval > 0x7fffffff, 0))
601 /* The value is too large. We don't try transliteration here since
602 this is not an error because of the lack of possibilities to
603 represent the result. This is a genuine bug in the input since
604 UCS4 does not allow such values. */
605 if (irreversible == NULL)
606 /* We are transliterating, don't try to correct anything. */
607 return __GCONV_ILLEGAL_INPUT;
609 if (flags & __GCONV_IGNORE_ERRORS)
611 /* Just ignore this character. */
612 ++*irreversible;
613 continue;
616 return __GCONV_ILLEGAL_INPUT;
619 *((uint32_t *) outptr)++ = inval;
622 *inptrp = inptr;
623 *outptrp = outptr;
625 /* Determine the status. */
626 if (*inptrp == inend)
627 result = __GCONV_EMPTY_INPUT;
628 else if (*inptrp + 4 > inend)
629 result = __GCONV_INCOMPLETE_INPUT;
630 else
632 assert (*outptrp + 4 > outend);
633 result = __GCONV_FULL_OUTPUT;
636 return result;
639 #ifndef _STRING_ARCH_unaligned
640 static inline int
641 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
642 struct __gconv_step_data *step_data,
643 const unsigned char **inptrp,
644 const unsigned char *inend,
645 unsigned char **outptrp, unsigned char *outend,
646 size_t *irreversible)
648 int flags = step_data->__flags;
649 const unsigned char *inptr = *inptrp;
650 unsigned char *outptr = *outptrp;
651 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
652 int result;
653 size_t cnt;
655 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
657 if (__builtin_expect (inptr[3] > 0x80, 0))
659 /* The value is too large. We don't try transliteration here since
660 this is not an error because of the lack of possibilities to
661 represent the result. This is a genuine bug in the input since
662 UCS4 does not allow such values. */
663 if (irreversible == NULL)
664 /* We are transliterating, don't try to correct anything. */
665 return __GCONV_ILLEGAL_INPUT;
667 if (flags & __GCONV_IGNORE_ERRORS)
669 /* Just ignore this character. */
670 ++*irreversible;
671 continue;
674 *inptrp = inptr;
675 *outptrp = outptr;
676 return __GCONV_ILLEGAL_INPUT;
679 # if __BYTE_ORDER == __BIG_ENDIAN
680 outptr[3] = inptr[0];
681 outptr[2] = inptr[1];
682 outptr[1] = inptr[2];
683 outptr[0] = inptr[3];
684 # else
685 outptr[0] = inptr[0];
686 outptr[1] = inptr[1];
687 outptr[2] = inptr[2];
688 outptr[3] = inptr[3];
689 # endif
691 outptr += 4;
694 *inptrp = inptr;
695 *outptrp = outptr;
697 /* Determine the status. */
698 if (*inptrp == inend)
699 result = __GCONV_EMPTY_INPUT;
700 else if (*inptrp + 4 > inend)
701 result = __GCONV_INCOMPLETE_INPUT;
702 else
704 assert (*outptrp + 4 > outend);
705 result = __GCONV_FULL_OUTPUT;
708 return result;
710 #endif
713 static inline int
714 ucs4le_internal_loop_single (struct __gconv_step *step,
715 struct __gconv_step_data *step_data,
716 const unsigned char **inptrp,
717 const unsigned char *inend,
718 unsigned char **outptrp, unsigned char *outend,
719 size_t *irreversible)
721 mbstate_t *state = step_data->__statep;
722 int flags = step_data->__flags;
723 size_t cnt = state->__count & 7;
725 while (*inptrp < inend && cnt < 4)
726 state->__value.__wchb[cnt++] = *(*inptrp)++;
728 if (__builtin_expect (cnt < 4, 0))
730 /* Still not enough bytes. Store the ones in the input buffer. */
731 state->__count &= ~7;
732 state->__count |= cnt;
734 return __GCONV_INCOMPLETE_INPUT;
737 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
740 /* The value is too large. We don't try transliteration here since
741 this is not an error because of the lack of possibilities to
742 represent the result. This is a genuine bug in the input since
743 UCS4 does not allow such values. */
744 if (!(flags & __GCONV_IGNORE_ERRORS))
745 return __GCONV_ILLEGAL_INPUT;
747 else
749 #if __BYTE_ORDER == __BIG_ENDIAN
750 (*outptrp)[0] = state->__value.__wchb[3];
751 (*outptrp)[1] = state->__value.__wchb[2];
752 (*outptrp)[2] = state->__value.__wchb[1];
753 (*outptrp)[3] = state->__value.__wchb[0];
754 #else
755 (*outptrp)[0] = state->__value.__wchb[0];
756 (*outptrp)[1] = state->__value.__wchb[1];
757 (*outptrp)[2] = state->__value.__wchb[2];
758 (*outptrp)[3] = state->__value.__wchb[3];
759 #endif
761 *outptrp += 4;
764 /* Clear the state buffer. */
765 state->__count &= ~7;
767 return __GCONV_OK;
770 #include <iconv/skeleton.c>
773 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
774 #define DEFINE_INIT 0
775 #define DEFINE_FINI 0
776 #define MIN_NEEDED_FROM 1
777 #define MIN_NEEDED_TO 4
778 #define FROM_DIRECTION 1
779 #define FROM_LOOP ascii_internal_loop
780 #define TO_LOOP ascii_internal_loop /* This is not used. */
781 #define FUNCTION_NAME __gconv_transform_ascii_internal
782 #define ONE_DIRECTION 1
784 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
785 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
786 #define LOOPFCT FROM_LOOP
787 #define BODY \
789 if (__builtin_expect (*inptr > '\x7f', 0)) \
791 /* The value is too large. We don't try transliteration here since \
792 this is not an error because of the lack of possibilities to \
793 represent the result. This is a genuine bug in the input since \
794 ASCII does not allow such values. */ \
795 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
797 else \
798 /* It's an one byte sequence. */ \
799 *((uint32_t *) outptr)++ = *inptr++; \
801 #define LOOP_NEED_FLAGS
802 #include <iconv/loop.c>
803 #include <iconv/skeleton.c>
806 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
807 #define DEFINE_INIT 0
808 #define DEFINE_FINI 0
809 #define MIN_NEEDED_FROM 4
810 #define MIN_NEEDED_TO 1
811 #define FROM_DIRECTION 1
812 #define FROM_LOOP internal_ascii_loop
813 #define TO_LOOP internal_ascii_loop /* This is not used. */
814 #define FUNCTION_NAME __gconv_transform_internal_ascii
815 #define ONE_DIRECTION 1
817 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
818 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
819 #define LOOPFCT FROM_LOOP
820 #define BODY \
822 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
824 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
825 STANDARD_TO_LOOP_ERR_HANDLER (4); \
827 else \
828 /* It's an one byte sequence. */ \
829 *outptr++ = *((const uint32_t *) inptr)++; \
831 #define LOOP_NEED_FLAGS
832 #include <iconv/loop.c>
833 #include <iconv/skeleton.c>
836 /* Convert from the internal (UCS4-like) format to UTF-8. */
837 #define DEFINE_INIT 0
838 #define DEFINE_FINI 0
839 #define MIN_NEEDED_FROM 4
840 #define MIN_NEEDED_TO 1
841 #define MAX_NEEDED_TO 6
842 #define FROM_DIRECTION 1
843 #define FROM_LOOP internal_utf8_loop
844 #define TO_LOOP internal_utf8_loop /* This is not used. */
845 #define FUNCTION_NAME __gconv_transform_internal_utf8
846 #define ONE_DIRECTION 1
848 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
849 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
850 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
851 #define LOOPFCT FROM_LOOP
852 #define BODY \
854 uint32_t wc = *((const uint32_t *) inptr); \
856 if (wc < 0x80) \
857 /* It's an one byte sequence. */ \
858 *outptr++ = (unsigned char) wc; \
859 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
861 size_t step; \
862 char *start; \
864 for (step = 2; step < 6; ++step) \
865 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
866 break; \
868 if (__builtin_expect (outptr + step > outend, 0)) \
870 /* Too long. */ \
871 result = __GCONV_FULL_OUTPUT; \
872 break; \
875 start = outptr; \
876 *outptr = (unsigned char) (~0xff >> step); \
877 outptr += step; \
878 --step; \
879 do \
881 start[step] = 0x80 | (wc & 0x3f); \
882 wc >>= 6; \
884 while (--step > 0); \
885 start[0] |= wc; \
887 else \
889 STANDARD_TO_LOOP_ERR_HANDLER (4); \
892 inptr += 4; \
894 #define LOOP_NEED_FLAGS
895 #include <iconv/loop.c>
896 #include <iconv/skeleton.c>
899 /* Convert from UTF-8 to the internal (UCS4-like) format. */
900 #define DEFINE_INIT 0
901 #define DEFINE_FINI 0
902 #define MIN_NEEDED_FROM 1
903 #define MAX_NEEDED_FROM 6
904 #define MIN_NEEDED_TO 4
905 #define FROM_DIRECTION 1
906 #define FROM_LOOP utf8_internal_loop
907 #define TO_LOOP utf8_internal_loop /* This is not used. */
908 #define FUNCTION_NAME __gconv_transform_utf8_internal
909 #define ONE_DIRECTION 1
911 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
912 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
913 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
914 #define LOOPFCT FROM_LOOP
915 #define BODY \
917 uint32_t ch; \
918 uint_fast32_t cnt; \
919 uint_fast32_t i; \
921 /* Next input byte. */ \
922 ch = *inptr; \
924 if (ch < 0x80) \
926 /* One byte sequence. */ \
927 cnt = 1; \
928 ++inptr; \
930 else \
932 if (ch >= 0xc2 && ch < 0xe0) \
934 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
935 otherwise the wide character could have been represented \
936 using a single byte. */ \
937 cnt = 2; \
938 ch &= 0x1f; \
940 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
942 /* We expect three bytes. */ \
943 cnt = 3; \
944 ch &= 0x0f; \
946 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
948 /* We expect four bytes. */ \
949 cnt = 4; \
950 ch &= 0x07; \
952 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
954 /* We expect five bytes. */ \
955 cnt = 5; \
956 ch &= 0x03; \
958 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
960 /* We expect six bytes. */ \
961 cnt = 6; \
962 ch &= 0x01; \
964 else \
966 int skipped; \
968 /* Search the end of this ill-formed UTF-8 character. This \
969 is the next byte with (x & 0xc0) != 0x80. */ \
970 skipped = 0; \
971 do \
972 ++skipped; \
973 while (inptr + skipped < inend \
974 && (*(inptr + skipped) & 0xc0) == 0x80 \
975 && skipped < 5); \
977 STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
980 if (__builtin_expect (inptr + cnt > inend, 0)) \
982 /* We don't have enough input. But before we report that check \
983 that all the bytes are correct. */ \
984 for (i = 1; inptr + i < inend; ++i) \
985 if ((inptr[i] & 0xc0) != 0x80) \
986 break; \
988 if (__builtin_expect (inptr + i == inend, 1)) \
990 result = __GCONV_INCOMPLETE_INPUT; \
991 break; \
994 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
997 /* Read the possible remaining bytes. */ \
998 for (i = 1; i < cnt; ++i) \
1000 uint32_t byte = inptr[i]; \
1002 if ((byte & 0xc0) != 0x80) \
1003 /* This is an illegal encoding. */ \
1004 break; \
1006 ch <<= 6; \
1007 ch |= byte & 0x3f; \
1010 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1011 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1012 have been represented with fewer than cnt bytes. */ \
1013 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1015 /* This is an illegal encoding. */ \
1016 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1019 inptr += cnt; \
1022 /* Now adjust the pointers and store the result. */ \
1023 *((uint32_t *) outptr)++ = ch; \
1025 #define LOOP_NEED_FLAGS
1027 #define STORE_REST \
1029 /* We store the remaining bytes while converting them into the UCS4 \
1030 format. We can assume that the first byte in the buffer is \
1031 correct and that it requires a larger number of bytes than there \
1032 are in the input buffer. */ \
1033 wint_t ch = **inptrp; \
1034 size_t cnt, r; \
1036 state->__count = inend - *inptrp; \
1038 if (ch >= 0xc2 && ch < 0xe0) \
1040 /* We expect two bytes. The first byte cannot be 0xc0 or \
1041 0xc1, otherwise the wide character could have been \
1042 represented using a single byte. */ \
1043 cnt = 2; \
1044 ch &= 0x1f; \
1046 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1048 /* We expect three bytes. */ \
1049 cnt = 3; \
1050 ch &= 0x0f; \
1052 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1054 /* We expect four bytes. */ \
1055 cnt = 4; \
1056 ch &= 0x07; \
1058 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1060 /* We expect five bytes. */ \
1061 cnt = 5; \
1062 ch &= 0x03; \
1064 else \
1066 /* We expect six bytes. */ \
1067 cnt = 6; \
1068 ch &= 0x01; \
1071 /* The first byte is already consumed. */ \
1072 r = cnt - 1; \
1073 while (++(*inptrp) < inend) \
1075 ch <<= 6; \
1076 ch |= **inptrp & 0x3f; \
1077 --r; \
1080 /* Shift for the so far missing bytes. */ \
1081 ch <<= r * 6; \
1083 /* Store the number of bytes expected for the entire sequence. */ \
1084 state->__count |= cnt << 8; \
1086 /* Store the value. */ \
1087 state->__value.__wch = ch; \
1090 #define UNPACK_BYTES \
1092 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1093 wint_t wch = state->__value.__wch; \
1094 size_t ntotal = state->__count >> 8; \
1096 inlen = state->__count & 255; \
1098 bytebuf[0] = inmask[ntotal - 2]; \
1100 do \
1102 if (--ntotal < inlen) \
1103 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1104 wch >>= 6; \
1106 while (ntotal > 1); \
1108 bytebuf[0] |= wch; \
1111 #define CLEAR_STATE \
1112 state->__count = 0
1115 #include <iconv/loop.c>
1116 #include <iconv/skeleton.c>
1119 /* Convert from UCS2 to the internal (UCS4-like) format. */
1120 #define DEFINE_INIT 0
1121 #define DEFINE_FINI 0
1122 #define MIN_NEEDED_FROM 2
1123 #define MIN_NEEDED_TO 4
1124 #define FROM_DIRECTION 1
1125 #define FROM_LOOP ucs2_internal_loop
1126 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1127 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1128 #define ONE_DIRECTION 1
1130 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1131 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1132 #define LOOPFCT FROM_LOOP
1133 #define BODY \
1135 uint16_t u1 = *((const uint16_t *) inptr); \
1137 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1139 /* Surrogate characters in UCS-2 input are not valid. Reject \
1140 them. (Catching this here is not security relevant.) */ \
1141 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1144 *((uint32_t *) outptr)++ = u1; \
1145 inptr += 2; \
1147 #define LOOP_NEED_FLAGS
1148 #include <iconv/loop.c>
1149 #include <iconv/skeleton.c>
1152 /* Convert from the internal (UCS4-like) format to UCS2. */
1153 #define DEFINE_INIT 0
1154 #define DEFINE_FINI 0
1155 #define MIN_NEEDED_FROM 4
1156 #define MIN_NEEDED_TO 2
1157 #define FROM_DIRECTION 1
1158 #define FROM_LOOP internal_ucs2_loop
1159 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1160 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1161 #define ONE_DIRECTION 1
1163 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1164 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1165 #define LOOPFCT FROM_LOOP
1166 #define BODY \
1168 uint32_t val = *((const uint32_t *) inptr); \
1170 if (__builtin_expect (val >= 0x10000, 0)) \
1172 UNICODE_TAG_HANDLER (val, 4); \
1173 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1175 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1177 /* Surrogate characters in UCS-4 input are not valid. \
1178 We must catch this, because the UCS-2 output might be \
1179 interpreted as UTF-16 by other programs. If we let \
1180 surrogates pass through, attackers could make a security \
1181 hole exploit by synthesizing any desired plane 1-16 \
1182 character. */ \
1183 result = __GCONV_ILLEGAL_INPUT; \
1184 if (! ignore_errors_p ()) \
1185 break; \
1186 inptr += 4; \
1187 ++*irreversible; \
1188 continue; \
1190 else \
1192 *((uint16_t *) outptr)++ = val; \
1193 inptr += 4; \
1196 #define LOOP_NEED_FLAGS
1197 #include <iconv/loop.c>
1198 #include <iconv/skeleton.c>
1201 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1202 #define DEFINE_INIT 0
1203 #define DEFINE_FINI 0
1204 #define MIN_NEEDED_FROM 2
1205 #define MIN_NEEDED_TO 4
1206 #define FROM_DIRECTION 1
1207 #define FROM_LOOP ucs2reverse_internal_loop
1208 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1209 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1210 #define ONE_DIRECTION 1
1212 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1213 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1214 #define LOOPFCT FROM_LOOP
1215 #define BODY \
1217 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1219 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1221 /* Surrogate characters in UCS-2 input are not valid. Reject \
1222 them. (Catching this here is not security relevant.) */ \
1223 if (! ignore_errors_p ()) \
1225 result = __GCONV_ILLEGAL_INPUT; \
1226 break; \
1228 inptr += 2; \
1229 ++*irreversible; \
1230 continue; \
1233 *((uint32_t *) outptr)++ = u1; \
1234 inptr += 2; \
1236 #define LOOP_NEED_FLAGS
1237 #include <iconv/loop.c>
1238 #include <iconv/skeleton.c>
1241 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1242 #define DEFINE_INIT 0
1243 #define DEFINE_FINI 0
1244 #define MIN_NEEDED_FROM 4
1245 #define MIN_NEEDED_TO 2
1246 #define FROM_DIRECTION 1
1247 #define FROM_LOOP internal_ucs2reverse_loop
1248 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1249 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1250 #define ONE_DIRECTION 1
1252 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1253 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1254 #define LOOPFCT FROM_LOOP
1255 #define BODY \
1257 uint32_t val = *((const uint32_t *) inptr); \
1258 if (__builtin_expect (val >= 0x10000, 0)) \
1260 UNICODE_TAG_HANDLER (val, 4); \
1261 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1263 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1265 /* Surrogate characters in UCS-4 input are not valid. \
1266 We must catch this, because the UCS-2 output might be \
1267 interpreted as UTF-16 by other programs. If we let \
1268 surrogates pass through, attackers could make a security \
1269 hole exploit by synthesizing any desired plane 1-16 \
1270 character. */ \
1271 if (! ignore_errors_p ()) \
1273 result = __GCONV_ILLEGAL_INPUT; \
1274 break; \
1276 inptr += 4; \
1277 ++*irreversible; \
1278 continue; \
1280 else \
1282 *((uint16_t *) outptr)++ = bswap_16 (val); \
1283 inptr += 4; \
1286 #define LOOP_NEED_FLAGS
1287 #include <iconv/loop.c>
1288 #include <iconv/skeleton.c>