[BZ #697]
[glibc.git] / iconv / gconv_simple.c
blob5cf3237abb964704f9b12d3806b1b14f8cf59c62
1 /* Simple transformations functions.
2 Copyright (C) 1997-2005, 2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
42 #ifndef EILSEQ
43 # define EILSEQ EINVAL
44 #endif
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
49 wint_t
50 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
52 if (c < 0x80)
53 return c;
54 else
55 return WEOF;
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
64 #define DEFINE_INIT 0
65 #define DEFINE_FINI 0
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
74 static inline int
75 __attribute ((always_inline))
76 internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
79 unsigned char **outptrp, unsigned char *outend,
80 size_t *irreversible)
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
85 int result;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
90 uint32_t *outptr32 = (uint32_t *) outptr;
92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
95 *inptrp = inptr;
96 *outptrp = (unsigned char *) outptr32;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
101 #else
102 # error "This endianess is not supported."
103 #endif
105 /* Determine the status. */
106 if (*inptrp == inend)
107 result = __GCONV_EMPTY_INPUT;
108 else if (*outptrp + 4 > outend)
109 result = __GCONV_FULL_OUTPUT;
110 else
111 result = __GCONV_INCOMPLETE_INPUT;
113 return result;
116 #ifndef _STRING_ARCH_unaligned
117 static inline int
118 __attribute ((always_inline))
119 internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
122 const unsigned char *inend,
123 unsigned char **outptrp, unsigned char *outend,
124 size_t *irreversible)
126 const unsigned char *inptr = *inptrp;
127 unsigned char *outptr = *outptrp;
128 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
129 int result;
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
133 size_t cnt;
135 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
137 outptr[0] = inptr[3];
138 outptr[1] = inptr[2];
139 outptr[2] = inptr[1];
140 outptr[3] = inptr[0];
143 *inptrp = inptr;
144 *outptrp = outptr;
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp = inptr + n_convert * 4;
148 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
149 # else
150 # error "This endianess is not supported."
151 # endif
153 /* Determine the status. */
154 if (*inptrp == inend)
155 result = __GCONV_EMPTY_INPUT;
156 else if (*outptrp + 4 > outend)
157 result = __GCONV_FULL_OUTPUT;
158 else
159 result = __GCONV_INCOMPLETE_INPUT;
161 return result;
163 #endif
166 static inline int
167 __attribute ((always_inline))
168 internal_ucs4_loop_single (struct __gconv_step *step,
169 struct __gconv_step_data *step_data,
170 const unsigned char **inptrp,
171 const unsigned char *inend,
172 unsigned char **outptrp, unsigned char *outend,
173 size_t *irreversible)
175 mbstate_t *state = step_data->__statep;
176 size_t cnt = state->__count & 7;
178 while (*inptrp < inend && cnt < 4)
179 state->__value.__wchb[cnt++] = *(*inptrp)++;
181 if (__builtin_expect (cnt < 4, 0))
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state->__count &= ~7;
185 state->__count |= cnt;
187 return __GCONV_INCOMPLETE_INPUT;
190 #if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp)[0] = state->__value.__wchb[3];
192 (*outptrp)[1] = state->__value.__wchb[2];
193 (*outptrp)[2] = state->__value.__wchb[1];
194 (*outptrp)[3] = state->__value.__wchb[0];
196 #elif __BYTE_ORDER == __BIG_ENDIAN
197 /* XXX unaligned */
198 (*outptrp)[0] = state->__value.__wchb[0];
199 (*outptrp)[1] = state->__value.__wchb[1];
200 (*outptrp)[2] = state->__value.__wchb[2];
201 (*outptrp)[3] = state->__value.__wchb[3];
202 #else
203 # error "This endianess is not supported."
204 #endif
205 *outptrp += 4;
207 /* Clear the state buffer. */
208 state->__count &= ~7;
210 return __GCONV_OK;
213 #include <iconv/skeleton.c>
216 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218 #define DEFINE_INIT 0
219 #define DEFINE_FINI 0
220 #define MIN_NEEDED_FROM 4
221 #define MIN_NEEDED_TO 4
222 #define FROM_DIRECTION 1
223 #define FROM_LOOP ucs4_internal_loop
224 #define TO_LOOP ucs4_internal_loop /* This is not used. */
225 #define FUNCTION_NAME __gconv_transform_ucs4_internal
228 static inline int
229 __attribute ((always_inline))
230 ucs4_internal_loop (struct __gconv_step *step,
231 struct __gconv_step_data *step_data,
232 const unsigned char **inptrp, const unsigned char *inend,
233 unsigned char **outptrp, unsigned char *outend,
234 size_t *irreversible)
236 int flags = step_data->__flags;
237 const unsigned char *inptr = *inptrp;
238 unsigned char *outptr = *outptrp;
239 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
240 int result;
241 size_t cnt;
243 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
245 uint32_t inval;
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248 inval = bswap_32 (*(const uint32_t *) inptr);
249 #else
250 inval = *(const uint32_t *) inptr;
251 #endif
253 if (__builtin_expect (inval > 0x7fffffff, 0))
255 /* The value is too large. We don't try transliteration here since
256 this is not an error because of the lack of possibilities to
257 represent the result. This is a genuine bug in the input since
258 UCS4 does not allow such values. */
259 if (irreversible == NULL)
260 /* We are transliterating, don't try to correct anything. */
261 return __GCONV_ILLEGAL_INPUT;
263 if (flags & __GCONV_IGNORE_ERRORS)
265 /* Just ignore this character. */
266 ++*irreversible;
267 continue;
270 *inptrp = inptr;
271 *outptrp = outptr;
272 return __GCONV_ILLEGAL_INPUT;
275 *((uint32_t *) outptr) = inval;
276 outptr += sizeof (uint32_t);
279 *inptrp = inptr;
280 *outptrp = outptr;
282 /* Determine the status. */
283 if (*inptrp == inend)
284 result = __GCONV_EMPTY_INPUT;
285 else if (*outptrp + 4 > outend)
286 result = __GCONV_FULL_OUTPUT;
287 else
288 result = __GCONV_INCOMPLETE_INPUT;
290 return result;
293 #ifndef _STRING_ARCH_unaligned
294 static inline int
295 __attribute ((always_inline))
296 ucs4_internal_loop_unaligned (struct __gconv_step *step,
297 struct __gconv_step_data *step_data,
298 const unsigned char **inptrp,
299 const unsigned char *inend,
300 unsigned char **outptrp, unsigned char *outend,
301 size_t *irreversible)
303 int flags = step_data->__flags;
304 const unsigned char *inptr = *inptrp;
305 unsigned char *outptr = *outptrp;
306 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
307 int result;
308 size_t cnt;
310 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
312 if (__builtin_expect (inptr[0] > 0x80, 0))
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
318 if (irreversible == NULL)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT;
322 if (flags & __GCONV_IGNORE_ERRORS)
324 /* Just ignore this character. */
325 ++*irreversible;
326 continue;
329 *inptrp = inptr;
330 *outptrp = outptr;
331 return __GCONV_ILLEGAL_INPUT;
334 # if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr[3] = inptr[0];
336 outptr[2] = inptr[1];
337 outptr[1] = inptr[2];
338 outptr[0] = inptr[3];
339 # else
340 outptr[0] = inptr[0];
341 outptr[1] = inptr[1];
342 outptr[2] = inptr[2];
343 outptr[3] = inptr[3];
344 # endif
345 outptr += 4;
348 *inptrp = inptr;
349 *outptrp = outptr;
351 /* Determine the status. */
352 if (*inptrp == inend)
353 result = __GCONV_EMPTY_INPUT;
354 else if (*outptrp + 4 > outend)
355 result = __GCONV_FULL_OUTPUT;
356 else
357 result = __GCONV_INCOMPLETE_INPUT;
359 return result;
361 #endif
364 static inline int
365 __attribute ((always_inline))
366 ucs4_internal_loop_single (struct __gconv_step *step,
367 struct __gconv_step_data *step_data,
368 const unsigned char **inptrp,
369 const unsigned char *inend,
370 unsigned char **outptrp, unsigned char *outend,
371 size_t *irreversible)
373 mbstate_t *state = step_data->__statep;
374 int flags = step_data->__flags;
375 size_t cnt = state->__count & 7;
377 while (*inptrp < inend && cnt < 4)
378 state->__value.__wchb[cnt++] = *(*inptrp)++;
380 if (__builtin_expect (cnt < 4, 0))
382 /* Still not enough bytes. Store the ones in the input buffer. */
383 state->__count &= ~7;
384 state->__count |= cnt;
386 return __GCONV_INCOMPLETE_INPUT;
389 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
392 /* The value is too large. We don't try transliteration here since
393 this is not an error because of the lack of possibilities to
394 represent the result. This is a genuine bug in the input since
395 UCS4 does not allow such values. */
396 if (!(flags & __GCONV_IGNORE_ERRORS))
398 *inptrp -= cnt - (state->__count & 7);
399 return __GCONV_ILLEGAL_INPUT;
402 else
404 #if __BYTE_ORDER == __LITTLE_ENDIAN
405 (*outptrp)[0] = state->__value.__wchb[3];
406 (*outptrp)[1] = state->__value.__wchb[2];
407 (*outptrp)[2] = state->__value.__wchb[1];
408 (*outptrp)[3] = state->__value.__wchb[0];
409 #elif __BYTE_ORDER == __BIG_ENDIAN
410 (*outptrp)[0] = state->__value.__wchb[0];
411 (*outptrp)[1] = state->__value.__wchb[1];
412 (*outptrp)[2] = state->__value.__wchb[2];
413 (*outptrp)[3] = state->__value.__wchb[3];
414 #endif
416 *outptrp += 4;
419 /* Clear the state buffer. */
420 state->__count &= ~7;
422 return __GCONV_OK;
425 #include <iconv/skeleton.c>
428 /* Similarly for the little endian form. */
429 #define DEFINE_INIT 0
430 #define DEFINE_FINI 0
431 #define MIN_NEEDED_FROM 4
432 #define MIN_NEEDED_TO 4
433 #define FROM_DIRECTION 1
434 #define FROM_LOOP internal_ucs4le_loop
435 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
436 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
439 static inline int
440 __attribute ((always_inline))
441 internal_ucs4le_loop (struct __gconv_step *step,
442 struct __gconv_step_data *step_data,
443 const unsigned char **inptrp, const unsigned char *inend,
444 unsigned char **outptrp, unsigned char *outend,
445 size_t *irreversible)
447 const unsigned char *inptr = *inptrp;
448 unsigned char *outptr = *outptrp;
449 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
450 int result;
452 #if __BYTE_ORDER == __BIG_ENDIAN
453 /* Sigh, we have to do some real work. */
454 size_t cnt;
455 uint32_t *outptr32 = (uint32_t *) outptr;
457 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
458 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
459 outptr = (unsigned char *) outptr32;
461 *inptrp = inptr;
462 *outptrp = outptr;
463 #elif __BYTE_ORDER == __LITTLE_ENDIAN
464 /* Simply copy the data. */
465 *inptrp = inptr + n_convert * 4;
466 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
467 #else
468 # error "This endianess is not supported."
469 #endif
471 /* Determine the status. */
472 if (*inptrp == inend)
473 result = __GCONV_EMPTY_INPUT;
474 else if (*outptrp + 4 > outend)
475 result = __GCONV_FULL_OUTPUT;
476 else
477 result = __GCONV_INCOMPLETE_INPUT;
479 return result;
482 #ifndef _STRING_ARCH_unaligned
483 static inline int
484 __attribute ((always_inline))
485 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
486 struct __gconv_step_data *step_data,
487 const unsigned char **inptrp,
488 const unsigned char *inend,
489 unsigned char **outptrp, unsigned char *outend,
490 size_t *irreversible)
492 const unsigned char *inptr = *inptrp;
493 unsigned char *outptr = *outptrp;
494 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
495 int result;
497 # if __BYTE_ORDER == __BIG_ENDIAN
498 /* Sigh, we have to do some real work. */
499 size_t cnt;
501 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
503 outptr[0] = inptr[3];
504 outptr[1] = inptr[2];
505 outptr[2] = inptr[1];
506 outptr[3] = inptr[0];
509 *inptrp = inptr;
510 *outptrp = outptr;
511 # elif __BYTE_ORDER == __LITTLE_ENDIAN
512 /* Simply copy the data. */
513 *inptrp = inptr + n_convert * 4;
514 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
515 # else
516 # error "This endianess is not supported."
517 # endif
519 /* Determine the status. */
520 if (*inptrp == inend)
521 result = __GCONV_EMPTY_INPUT;
522 else if (*inptrp + 4 > inend)
523 result = __GCONV_INCOMPLETE_INPUT;
524 else
526 assert (*outptrp + 4 > outend);
527 result = __GCONV_FULL_OUTPUT;
530 return result;
532 #endif
535 static inline int
536 __attribute ((always_inline))
537 internal_ucs4le_loop_single (struct __gconv_step *step,
538 struct __gconv_step_data *step_data,
539 const unsigned char **inptrp,
540 const unsigned char *inend,
541 unsigned char **outptrp, unsigned char *outend,
542 size_t *irreversible)
544 mbstate_t *state = step_data->__statep;
545 size_t cnt = state->__count & 7;
547 while (*inptrp < inend && cnt < 4)
548 state->__value.__wchb[cnt++] = *(*inptrp)++;
550 if (__builtin_expect (cnt < 4, 0))
552 /* Still not enough bytes. Store the ones in the input buffer. */
553 state->__count &= ~7;
554 state->__count |= cnt;
556 return __GCONV_INCOMPLETE_INPUT;
559 #if __BYTE_ORDER == __BIG_ENDIAN
560 (*outptrp)[0] = state->__value.__wchb[3];
561 (*outptrp)[1] = state->__value.__wchb[2];
562 (*outptrp)[2] = state->__value.__wchb[1];
563 (*outptrp)[3] = state->__value.__wchb[0];
565 #else
566 /* XXX unaligned */
567 (*outptrp)[0] = state->__value.__wchb[0];
568 (*outptrp)[1] = state->__value.__wchb[1];
569 (*outptrp)[2] = state->__value.__wchb[2];
570 (*outptrp)[3] = state->__value.__wchb[3];
572 #endif
574 *outptrp += 4;
576 /* Clear the state buffer. */
577 state->__count &= ~7;
579 return __GCONV_OK;
582 #include <iconv/skeleton.c>
585 /* And finally from UCS4-LE to the internal encoding. */
586 #define DEFINE_INIT 0
587 #define DEFINE_FINI 0
588 #define MIN_NEEDED_FROM 4
589 #define MIN_NEEDED_TO 4
590 #define FROM_DIRECTION 1
591 #define FROM_LOOP ucs4le_internal_loop
592 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
593 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
596 static inline int
597 __attribute ((always_inline))
598 ucs4le_internal_loop (struct __gconv_step *step,
599 struct __gconv_step_data *step_data,
600 const unsigned char **inptrp, const unsigned char *inend,
601 unsigned char **outptrp, unsigned char *outend,
602 size_t *irreversible)
604 int flags = step_data->__flags;
605 const unsigned char *inptr = *inptrp;
606 unsigned char *outptr = *outptrp;
607 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
608 int result;
609 size_t cnt;
611 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
613 uint32_t inval;
615 #if __BYTE_ORDER == __BIG_ENDIAN
616 inval = bswap_32 (*(const uint32_t *) inptr);
617 #else
618 inval = *(const uint32_t *) inptr;
619 #endif
621 if (__builtin_expect (inval > 0x7fffffff, 0))
623 /* The value is too large. We don't try transliteration here since
624 this is not an error because of the lack of possibilities to
625 represent the result. This is a genuine bug in the input since
626 UCS4 does not allow such values. */
627 if (irreversible == NULL)
628 /* We are transliterating, don't try to correct anything. */
629 return __GCONV_ILLEGAL_INPUT;
631 if (flags & __GCONV_IGNORE_ERRORS)
633 /* Just ignore this character. */
634 ++*irreversible;
635 continue;
638 return __GCONV_ILLEGAL_INPUT;
641 *((uint32_t *) outptr) = inval;
642 outptr += sizeof (uint32_t);
645 *inptrp = inptr;
646 *outptrp = outptr;
648 /* Determine the status. */
649 if (*inptrp == inend)
650 result = __GCONV_EMPTY_INPUT;
651 else if (*inptrp + 4 > inend)
652 result = __GCONV_INCOMPLETE_INPUT;
653 else
655 assert (*outptrp + 4 > outend);
656 result = __GCONV_FULL_OUTPUT;
659 return result;
662 #ifndef _STRING_ARCH_unaligned
663 static inline int
664 __attribute ((always_inline))
665 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
666 struct __gconv_step_data *step_data,
667 const unsigned char **inptrp,
668 const unsigned char *inend,
669 unsigned char **outptrp, unsigned char *outend,
670 size_t *irreversible)
672 int flags = step_data->__flags;
673 const unsigned char *inptr = *inptrp;
674 unsigned char *outptr = *outptrp;
675 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
676 int result;
677 size_t cnt;
679 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
681 if (__builtin_expect (inptr[3] > 0x80, 0))
683 /* The value is too large. We don't try transliteration here since
684 this is not an error because of the lack of possibilities to
685 represent the result. This is a genuine bug in the input since
686 UCS4 does not allow such values. */
687 if (irreversible == NULL)
688 /* We are transliterating, don't try to correct anything. */
689 return __GCONV_ILLEGAL_INPUT;
691 if (flags & __GCONV_IGNORE_ERRORS)
693 /* Just ignore this character. */
694 ++*irreversible;
695 continue;
698 *inptrp = inptr;
699 *outptrp = outptr;
700 return __GCONV_ILLEGAL_INPUT;
703 # if __BYTE_ORDER == __BIG_ENDIAN
704 outptr[3] = inptr[0];
705 outptr[2] = inptr[1];
706 outptr[1] = inptr[2];
707 outptr[0] = inptr[3];
708 # else
709 outptr[0] = inptr[0];
710 outptr[1] = inptr[1];
711 outptr[2] = inptr[2];
712 outptr[3] = inptr[3];
713 # endif
715 outptr += 4;
718 *inptrp = inptr;
719 *outptrp = outptr;
721 /* Determine the status. */
722 if (*inptrp == inend)
723 result = __GCONV_EMPTY_INPUT;
724 else if (*inptrp + 4 > inend)
725 result = __GCONV_INCOMPLETE_INPUT;
726 else
728 assert (*outptrp + 4 > outend);
729 result = __GCONV_FULL_OUTPUT;
732 return result;
734 #endif
737 static inline int
738 __attribute ((always_inline))
739 ucs4le_internal_loop_single (struct __gconv_step *step,
740 struct __gconv_step_data *step_data,
741 const unsigned char **inptrp,
742 const unsigned char *inend,
743 unsigned char **outptrp, unsigned char *outend,
744 size_t *irreversible)
746 mbstate_t *state = step_data->__statep;
747 int flags = step_data->__flags;
748 size_t cnt = state->__count & 7;
750 while (*inptrp < inend && cnt < 4)
751 state->__value.__wchb[cnt++] = *(*inptrp)++;
753 if (__builtin_expect (cnt < 4, 0))
755 /* Still not enough bytes. Store the ones in the input buffer. */
756 state->__count &= ~7;
757 state->__count |= cnt;
759 return __GCONV_INCOMPLETE_INPUT;
762 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
765 /* The value is too large. We don't try transliteration here since
766 this is not an error because of the lack of possibilities to
767 represent the result. This is a genuine bug in the input since
768 UCS4 does not allow such values. */
769 if (!(flags & __GCONV_IGNORE_ERRORS))
770 return __GCONV_ILLEGAL_INPUT;
772 else
774 #if __BYTE_ORDER == __BIG_ENDIAN
775 (*outptrp)[0] = state->__value.__wchb[3];
776 (*outptrp)[1] = state->__value.__wchb[2];
777 (*outptrp)[2] = state->__value.__wchb[1];
778 (*outptrp)[3] = state->__value.__wchb[0];
779 #else
780 (*outptrp)[0] = state->__value.__wchb[0];
781 (*outptrp)[1] = state->__value.__wchb[1];
782 (*outptrp)[2] = state->__value.__wchb[2];
783 (*outptrp)[3] = state->__value.__wchb[3];
784 #endif
786 *outptrp += 4;
789 /* Clear the state buffer. */
790 state->__count &= ~7;
792 return __GCONV_OK;
795 #include <iconv/skeleton.c>
798 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
799 #define DEFINE_INIT 0
800 #define DEFINE_FINI 0
801 #define MIN_NEEDED_FROM 1
802 #define MIN_NEEDED_TO 4
803 #define FROM_DIRECTION 1
804 #define FROM_LOOP ascii_internal_loop
805 #define TO_LOOP ascii_internal_loop /* This is not used. */
806 #define FUNCTION_NAME __gconv_transform_ascii_internal
807 #define ONE_DIRECTION 1
809 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
810 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
811 #define LOOPFCT FROM_LOOP
812 #define BODY \
814 if (__builtin_expect (*inptr > '\x7f', 0)) \
816 /* The value is too large. We don't try transliteration here since \
817 this is not an error because of the lack of possibilities to \
818 represent the result. This is a genuine bug in the input since \
819 ASCII does not allow such values. */ \
820 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
822 else \
824 /* It's an one byte sequence. */ \
825 *((uint32_t *) outptr) = *inptr++; \
826 outptr += sizeof (uint32_t); \
829 #define LOOP_NEED_FLAGS
830 #include <iconv/loop.c>
831 #include <iconv/skeleton.c>
834 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
835 #define DEFINE_INIT 0
836 #define DEFINE_FINI 0
837 #define MIN_NEEDED_FROM 4
838 #define MIN_NEEDED_TO 1
839 #define FROM_DIRECTION 1
840 #define FROM_LOOP internal_ascii_loop
841 #define TO_LOOP internal_ascii_loop /* This is not used. */
842 #define FUNCTION_NAME __gconv_transform_internal_ascii
843 #define ONE_DIRECTION 1
845 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
846 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
847 #define LOOPFCT FROM_LOOP
848 #define BODY \
850 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
852 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
853 STANDARD_TO_LOOP_ERR_HANDLER (4); \
855 else \
857 /* It's an one byte sequence. */ \
858 *outptr++ = *((const uint32_t *) inptr); \
859 inptr += sizeof (uint32_t); \
862 #define LOOP_NEED_FLAGS
863 #include <iconv/loop.c>
864 #include <iconv/skeleton.c>
867 /* Convert from the internal (UCS4-like) format to UTF-8. */
868 #define DEFINE_INIT 0
869 #define DEFINE_FINI 0
870 #define MIN_NEEDED_FROM 4
871 #define MIN_NEEDED_TO 1
872 #define MAX_NEEDED_TO 6
873 #define FROM_DIRECTION 1
874 #define FROM_LOOP internal_utf8_loop
875 #define TO_LOOP internal_utf8_loop /* This is not used. */
876 #define FUNCTION_NAME __gconv_transform_internal_utf8
877 #define ONE_DIRECTION 1
879 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
880 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
881 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
882 #define LOOPFCT FROM_LOOP
883 #define BODY \
885 uint32_t wc = *((const uint32_t *) inptr); \
887 if (__builtin_expect (wc < 0x80, 1)) \
888 /* It's an one byte sequence. */ \
889 *outptr++ = (unsigned char) wc; \
890 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
892 size_t step; \
893 unsigned char *start; \
895 for (step = 2; step < 6; ++step) \
896 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
897 break; \
899 if (__builtin_expect (outptr + step > outend, 0)) \
901 /* Too long. */ \
902 result = __GCONV_FULL_OUTPUT; \
903 break; \
906 start = outptr; \
907 *outptr = (unsigned char) (~0xff >> step); \
908 outptr += step; \
909 do \
911 start[--step] = 0x80 | (wc & 0x3f); \
912 wc >>= 6; \
914 while (step > 1); \
915 start[0] |= wc; \
917 else \
919 STANDARD_TO_LOOP_ERR_HANDLER (4); \
922 inptr += 4; \
924 #define LOOP_NEED_FLAGS
925 #include <iconv/loop.c>
926 #include <iconv/skeleton.c>
929 /* Convert from UTF-8 to the internal (UCS4-like) format. */
930 #define DEFINE_INIT 0
931 #define DEFINE_FINI 0
932 #define MIN_NEEDED_FROM 1
933 #define MAX_NEEDED_FROM 6
934 #define MIN_NEEDED_TO 4
935 #define FROM_DIRECTION 1
936 #define FROM_LOOP utf8_internal_loop
937 #define TO_LOOP utf8_internal_loop /* This is not used. */
938 #define FUNCTION_NAME __gconv_transform_utf8_internal
939 #define ONE_DIRECTION 1
941 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
942 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
943 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
944 #define LOOPFCT FROM_LOOP
945 #define BODY \
947 /* Next input byte. */ \
948 uint32_t ch = *inptr; \
950 if (__builtin_expect (ch < 0x80, 1)) \
952 /* One byte sequence. */ \
953 ++inptr; \
955 else \
957 uint_fast32_t cnt; \
958 uint_fast32_t i; \
960 if (ch >= 0xc2 && ch < 0xe0) \
962 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
963 otherwise the wide character could have been represented \
964 using a single byte. */ \
965 cnt = 2; \
966 ch &= 0x1f; \
968 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
970 /* We expect three bytes. */ \
971 cnt = 3; \
972 ch &= 0x0f; \
974 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
976 /* We expect four bytes. */ \
977 cnt = 4; \
978 ch &= 0x07; \
980 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
982 /* We expect five bytes. */ \
983 cnt = 5; \
984 ch &= 0x03; \
986 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
988 /* We expect six bytes. */ \
989 cnt = 6; \
990 ch &= 0x01; \
992 else \
994 /* Search the end of this ill-formed UTF-8 character. This \
995 is the next byte with (x & 0xc0) != 0x80. */ \
996 i = 0; \
997 do \
998 ++i; \
999 while (inptr + i < inend \
1000 && (*(inptr + i) & 0xc0) == 0x80 \
1001 && i < 5); \
1003 errout: \
1004 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1007 if (__builtin_expect (inptr + cnt > inend, 0)) \
1009 /* We don't have enough input. But before we report that check \
1010 that all the bytes are correct. */ \
1011 for (i = 1; inptr + i < inend; ++i) \
1012 if ((inptr[i] & 0xc0) != 0x80) \
1013 break; \
1015 if (__builtin_expect (inptr + i == inend, 1)) \
1017 result = __GCONV_INCOMPLETE_INPUT; \
1018 break; \
1021 goto errout; \
1024 /* Read the possible remaining bytes. */ \
1025 for (i = 1; i < cnt; ++i) \
1027 uint32_t byte = inptr[i]; \
1029 if ((byte & 0xc0) != 0x80) \
1030 /* This is an illegal encoding. */ \
1031 break; \
1033 ch <<= 6; \
1034 ch |= byte & 0x3f; \
1037 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1038 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1039 have been represented with fewer than cnt bytes. */ \
1040 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1042 /* This is an illegal encoding. */ \
1043 goto errout; \
1046 inptr += cnt; \
1049 /* Now adjust the pointers and store the result. */ \
1050 *((uint32_t *) outptr) = ch; \
1051 outptr += sizeof (uint32_t); \
1053 #define LOOP_NEED_FLAGS
1055 #define STORE_REST \
1057 /* We store the remaining bytes while converting them into the UCS4 \
1058 format. We can assume that the first byte in the buffer is \
1059 correct and that it requires a larger number of bytes than there \
1060 are in the input buffer. */ \
1061 wint_t ch = **inptrp; \
1062 size_t cnt, r; \
1064 state->__count = inend - *inptrp; \
1066 if (ch >= 0xc2 && ch < 0xe0) \
1068 /* We expect two bytes. The first byte cannot be 0xc0 or \
1069 0xc1, otherwise the wide character could have been \
1070 represented using a single byte. */ \
1071 cnt = 2; \
1072 ch &= 0x1f; \
1074 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1076 /* We expect three bytes. */ \
1077 cnt = 3; \
1078 ch &= 0x0f; \
1080 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1082 /* We expect four bytes. */ \
1083 cnt = 4; \
1084 ch &= 0x07; \
1086 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1088 /* We expect five bytes. */ \
1089 cnt = 5; \
1090 ch &= 0x03; \
1092 else \
1094 /* We expect six bytes. */ \
1095 cnt = 6; \
1096 ch &= 0x01; \
1099 /* The first byte is already consumed. */ \
1100 r = cnt - 1; \
1101 while (++(*inptrp) < inend) \
1103 ch <<= 6; \
1104 ch |= **inptrp & 0x3f; \
1105 --r; \
1108 /* Shift for the so far missing bytes. */ \
1109 ch <<= r * 6; \
1111 /* Store the number of bytes expected for the entire sequence. */ \
1112 state->__count |= cnt << 8; \
1114 /* Store the value. */ \
1115 state->__value.__wch = ch; \
1118 #define UNPACK_BYTES \
1120 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1121 wint_t wch = state->__value.__wch; \
1122 size_t ntotal = state->__count >> 8; \
1124 inlen = state->__count & 255; \
1126 bytebuf[0] = inmask[ntotal - 2]; \
1128 do \
1130 if (--ntotal < inlen) \
1131 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1132 wch >>= 6; \
1134 while (ntotal > 1); \
1136 bytebuf[0] |= wch; \
1139 #define CLEAR_STATE \
1140 state->__count = 0
1143 #include <iconv/loop.c>
1144 #include <iconv/skeleton.c>
1147 /* Convert from UCS2 to the internal (UCS4-like) format. */
1148 #define DEFINE_INIT 0
1149 #define DEFINE_FINI 0
1150 #define MIN_NEEDED_FROM 2
1151 #define MIN_NEEDED_TO 4
1152 #define FROM_DIRECTION 1
1153 #define FROM_LOOP ucs2_internal_loop
1154 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1155 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1156 #define ONE_DIRECTION 1
1158 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1159 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1160 #define LOOPFCT FROM_LOOP
1161 #define BODY \
1163 uint16_t u1 = get16 (inptr); \
1165 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1167 /* Surrogate characters in UCS-2 input are not valid. Reject \
1168 them. (Catching this here is not security relevant.) */ \
1169 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1172 *((uint32_t *) outptr) = u1; \
1173 outptr += sizeof (uint32_t); \
1174 inptr += 2; \
1176 #define LOOP_NEED_FLAGS
1177 #include <iconv/loop.c>
1178 #include <iconv/skeleton.c>
1181 /* Convert from the internal (UCS4-like) format to UCS2. */
1182 #define DEFINE_INIT 0
1183 #define DEFINE_FINI 0
1184 #define MIN_NEEDED_FROM 4
1185 #define MIN_NEEDED_TO 2
1186 #define FROM_DIRECTION 1
1187 #define FROM_LOOP internal_ucs2_loop
1188 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1189 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1190 #define ONE_DIRECTION 1
1192 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1193 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1194 #define LOOPFCT FROM_LOOP
1195 #define BODY \
1197 uint32_t val = *((const uint32_t *) inptr); \
1199 if (__builtin_expect (val >= 0x10000, 0)) \
1201 UNICODE_TAG_HANDLER (val, 4); \
1202 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1204 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1206 /* Surrogate characters in UCS-4 input are not valid. \
1207 We must catch this, because the UCS-2 output might be \
1208 interpreted as UTF-16 by other programs. If we let \
1209 surrogates pass through, attackers could make a security \
1210 hole exploit by synthesizing any desired plane 1-16 \
1211 character. */ \
1212 result = __GCONV_ILLEGAL_INPUT; \
1213 if (! ignore_errors_p ()) \
1214 break; \
1215 inptr += 4; \
1216 ++*irreversible; \
1217 continue; \
1219 else \
1221 put16 (outptr, val); \
1222 outptr += sizeof (uint16_t); \
1223 inptr += 4; \
1226 #define LOOP_NEED_FLAGS
1227 #include <iconv/loop.c>
1228 #include <iconv/skeleton.c>
1231 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1232 #define DEFINE_INIT 0
1233 #define DEFINE_FINI 0
1234 #define MIN_NEEDED_FROM 2
1235 #define MIN_NEEDED_TO 4
1236 #define FROM_DIRECTION 1
1237 #define FROM_LOOP ucs2reverse_internal_loop
1238 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1239 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1240 #define ONE_DIRECTION 1
1242 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1243 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1244 #define LOOPFCT FROM_LOOP
1245 #define BODY \
1247 uint16_t u1 = bswap_16 (get16 (inptr)); \
1249 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1251 /* Surrogate characters in UCS-2 input are not valid. Reject \
1252 them. (Catching this here is not security relevant.) */ \
1253 if (! ignore_errors_p ()) \
1255 result = __GCONV_ILLEGAL_INPUT; \
1256 break; \
1258 inptr += 2; \
1259 ++*irreversible; \
1260 continue; \
1263 *((uint32_t *) outptr) = u1; \
1264 outptr += sizeof (uint32_t); \
1265 inptr += 2; \
1267 #define LOOP_NEED_FLAGS
1268 #include <iconv/loop.c>
1269 #include <iconv/skeleton.c>
1272 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1273 #define DEFINE_INIT 0
1274 #define DEFINE_FINI 0
1275 #define MIN_NEEDED_FROM 4
1276 #define MIN_NEEDED_TO 2
1277 #define FROM_DIRECTION 1
1278 #define FROM_LOOP internal_ucs2reverse_loop
1279 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1280 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1281 #define ONE_DIRECTION 1
1283 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1284 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1285 #define LOOPFCT FROM_LOOP
1286 #define BODY \
1288 uint32_t val = *((const uint32_t *) inptr); \
1289 if (__builtin_expect (val >= 0x10000, 0)) \
1291 UNICODE_TAG_HANDLER (val, 4); \
1292 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1294 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1296 /* Surrogate characters in UCS-4 input are not valid. \
1297 We must catch this, because the UCS-2 output might be \
1298 interpreted as UTF-16 by other programs. If we let \
1299 surrogates pass through, attackers could make a security \
1300 hole exploit by synthesizing any desired plane 1-16 \
1301 character. */ \
1302 if (! ignore_errors_p ()) \
1304 result = __GCONV_ILLEGAL_INPUT; \
1305 break; \
1307 inptr += 4; \
1308 ++*irreversible; \
1309 continue; \
1311 else \
1313 put16 (outptr, bswap_16 (val)); \
1314 outptr += sizeof (uint16_t); \
1315 inptr += 4; \
1318 #define LOOP_NEED_FLAGS
1319 #include <iconv/loop.c>
1320 #include <iconv/skeleton.c>