2.5-18.1
[glibc.git] / iconv / gconv_simple.c
blob343c27521f9f8a4c5cb3fa5033805087bcabd55b
1 /* Simple transformations functions.
2 Copyright (C) 1997-2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
42 #ifndef EILSEQ
43 # define EILSEQ EINVAL
44 #endif
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
49 wint_t
50 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
52 if (c < 0x80)
53 return c;
54 else
55 return WEOF;
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
64 #define DEFINE_INIT 0
65 #define DEFINE_FINI 0
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
74 static inline int
75 __attribute ((always_inline))
76 internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
79 unsigned char **outptrp, unsigned char *outend,
80 size_t *irreversible)
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
85 int result;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
90 uint32_t *outptr32 = (uint32_t *) outptr;
92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
95 *inptrp = inptr;
96 *outptrp = (unsigned char *) outptr32;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
101 #else
102 # error "This endianess is not supported."
103 #endif
105 /* Determine the status. */
106 if (*inptrp == inend)
107 result = __GCONV_EMPTY_INPUT;
108 else if (*outptrp + 4 > outend)
109 result = __GCONV_FULL_OUTPUT;
110 else
111 result = __GCONV_INCOMPLETE_INPUT;
113 return result;
116 #ifndef _STRING_ARCH_unaligned
117 static inline int
118 __attribute ((always_inline))
119 internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
122 const unsigned char *inend,
123 unsigned char **outptrp, unsigned char *outend,
124 size_t *irreversible)
126 const unsigned char *inptr = *inptrp;
127 unsigned char *outptr = *outptrp;
128 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
129 int result;
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
133 size_t cnt;
135 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
137 outptr[0] = inptr[3];
138 outptr[1] = inptr[2];
139 outptr[2] = inptr[1];
140 outptr[3] = inptr[0];
143 *inptrp = inptr;
144 *outptrp = outptr;
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp = inptr + n_convert * 4;
148 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
149 # else
150 # error "This endianess is not supported."
151 # endif
153 /* Determine the status. */
154 if (*inptrp == inend)
155 result = __GCONV_EMPTY_INPUT;
156 else if (*outptrp + 4 > outend)
157 result = __GCONV_FULL_OUTPUT;
158 else
159 result = __GCONV_INCOMPLETE_INPUT;
161 return result;
163 #endif
166 static inline int
167 __attribute ((always_inline))
168 internal_ucs4_loop_single (struct __gconv_step *step,
169 struct __gconv_step_data *step_data,
170 const unsigned char **inptrp,
171 const unsigned char *inend,
172 unsigned char **outptrp, unsigned char *outend,
173 size_t *irreversible)
175 mbstate_t *state = step_data->__statep;
176 size_t cnt = state->__count & 7;
178 while (*inptrp < inend && cnt < 4)
179 state->__value.__wchb[cnt++] = *(*inptrp)++;
181 if (__builtin_expect (cnt < 4, 0))
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state->__count &= ~7;
185 state->__count |= cnt;
187 return __GCONV_INCOMPLETE_INPUT;
190 #if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp)[0] = state->__value.__wchb[3];
192 (*outptrp)[1] = state->__value.__wchb[2];
193 (*outptrp)[2] = state->__value.__wchb[1];
194 (*outptrp)[3] = state->__value.__wchb[0];
196 #elif __BYTE_ORDER == __BIG_ENDIAN
197 /* XXX unaligned */
198 (*outptrp)[0] = state->__value.__wchb[0];
199 (*outptrp)[1] = state->__value.__wchb[1];
200 (*outptrp)[2] = state->__value.__wchb[2];
201 (*outptrp)[3] = state->__value.__wchb[3];
202 #else
203 # error "This endianess is not supported."
204 #endif
205 *outptrp += 4;
207 /* Clear the state buffer. */
208 state->__count &= ~7;
210 return __GCONV_OK;
213 #include <iconv/skeleton.c>
216 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218 #define DEFINE_INIT 0
219 #define DEFINE_FINI 0
220 #define MIN_NEEDED_FROM 4
221 #define MIN_NEEDED_TO 4
222 #define FROM_DIRECTION 1
223 #define FROM_LOOP ucs4_internal_loop
224 #define TO_LOOP ucs4_internal_loop /* This is not used. */
225 #define FUNCTION_NAME __gconv_transform_ucs4_internal
228 static inline int
229 __attribute ((always_inline))
230 ucs4_internal_loop (struct __gconv_step *step,
231 struct __gconv_step_data *step_data,
232 const unsigned char **inptrp, const unsigned char *inend,
233 unsigned char **outptrp, unsigned char *outend,
234 size_t *irreversible)
236 int flags = step_data->__flags;
237 const unsigned char *inptr = *inptrp;
238 unsigned char *outptr = *outptrp;
239 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
240 int result;
241 size_t cnt;
243 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
245 uint32_t inval;
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248 inval = bswap_32 (*(const uint32_t *) inptr);
249 #else
250 inval = *(const uint32_t *) inptr;
251 #endif
253 if (__builtin_expect (inval > 0x7fffffff, 0))
255 /* The value is too large. We don't try transliteration here since
256 this is not an error because of the lack of possibilities to
257 represent the result. This is a genuine bug in the input since
258 UCS4 does not allow such values. */
259 if (irreversible == NULL)
260 /* We are transliterating, don't try to correct anything. */
261 return __GCONV_ILLEGAL_INPUT;
263 if (flags & __GCONV_IGNORE_ERRORS)
265 /* Just ignore this character. */
266 ++*irreversible;
267 continue;
270 *inptrp = inptr;
271 *outptrp = outptr;
272 return __GCONV_ILLEGAL_INPUT;
275 *((uint32_t *) outptr) = inval;
276 outptr += sizeof (uint32_t);
279 *inptrp = inptr;
280 *outptrp = outptr;
282 /* Determine the status. */
283 if (*inptrp == inend)
284 result = __GCONV_EMPTY_INPUT;
285 else if (*outptrp + 4 > outend)
286 result = __GCONV_FULL_OUTPUT;
287 else
288 result = __GCONV_INCOMPLETE_INPUT;
290 return result;
293 #ifndef _STRING_ARCH_unaligned
294 static inline int
295 __attribute ((always_inline))
296 ucs4_internal_loop_unaligned (struct __gconv_step *step,
297 struct __gconv_step_data *step_data,
298 const unsigned char **inptrp,
299 const unsigned char *inend,
300 unsigned char **outptrp, unsigned char *outend,
301 size_t *irreversible)
303 int flags = step_data->__flags;
304 const unsigned char *inptr = *inptrp;
305 unsigned char *outptr = *outptrp;
306 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
307 int result;
308 size_t cnt;
310 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
312 if (__builtin_expect (inptr[0] > 0x80, 0))
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
318 if (irreversible == NULL)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT;
322 if (flags & __GCONV_IGNORE_ERRORS)
324 /* Just ignore this character. */
325 ++*irreversible;
326 continue;
329 *inptrp = inptr;
330 *outptrp = outptr;
331 return __GCONV_ILLEGAL_INPUT;
334 # if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr[3] = inptr[0];
336 outptr[2] = inptr[1];
337 outptr[1] = inptr[2];
338 outptr[0] = inptr[3];
339 # else
340 outptr[0] = inptr[0];
341 outptr[1] = inptr[1];
342 outptr[2] = inptr[2];
343 outptr[3] = inptr[3];
344 # endif
345 outptr += 4;
348 *inptrp = inptr;
349 *outptrp = outptr;
351 /* Determine the status. */
352 if (*inptrp == inend)
353 result = __GCONV_EMPTY_INPUT;
354 else if (*outptrp + 4 > outend)
355 result = __GCONV_FULL_OUTPUT;
356 else
357 result = __GCONV_INCOMPLETE_INPUT;
359 return result;
361 #endif
364 static inline int
365 __attribute ((always_inline))
366 ucs4_internal_loop_single (struct __gconv_step *step,
367 struct __gconv_step_data *step_data,
368 const unsigned char **inptrp,
369 const unsigned char *inend,
370 unsigned char **outptrp, unsigned char *outend,
371 size_t *irreversible)
373 mbstate_t *state = step_data->__statep;
374 int flags = step_data->__flags;
375 size_t cnt = state->__count & 7;
377 while (*inptrp < inend && cnt < 4)
378 state->__value.__wchb[cnt++] = *(*inptrp)++;
380 if (__builtin_expect (cnt < 4, 0))
382 /* Still not enough bytes. Store the ones in the input buffer. */
383 state->__count &= ~7;
384 state->__count |= cnt;
386 return __GCONV_INCOMPLETE_INPUT;
389 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
392 /* The value is too large. We don't try transliteration here since
393 this is not an error because of the lack of possibilities to
394 represent the result. This is a genuine bug in the input since
395 UCS4 does not allow such values. */
396 if (!(flags & __GCONV_IGNORE_ERRORS))
398 *inptrp -= cnt - (state->__count & 7);
399 return __GCONV_ILLEGAL_INPUT;
402 else
404 #if __BYTE_ORDER == __LITTLE_ENDIAN
405 (*outptrp)[0] = state->__value.__wchb[3];
406 (*outptrp)[1] = state->__value.__wchb[2];
407 (*outptrp)[2] = state->__value.__wchb[1];
408 (*outptrp)[3] = state->__value.__wchb[0];
409 #elif __BYTE_ORDER == __BIG_ENDIAN
410 (*outptrp)[0] = state->__value.__wchb[0];
411 (*outptrp)[1] = state->__value.__wchb[1];
412 (*outptrp)[2] = state->__value.__wchb[2];
413 (*outptrp)[3] = state->__value.__wchb[3];
414 #endif
416 *outptrp += 4;
419 /* Clear the state buffer. */
420 state->__count &= ~7;
422 return __GCONV_OK;
425 #include <iconv/skeleton.c>
428 /* Similarly for the little endian form. */
429 #define DEFINE_INIT 0
430 #define DEFINE_FINI 0
431 #define MIN_NEEDED_FROM 4
432 #define MIN_NEEDED_TO 4
433 #define FROM_DIRECTION 1
434 #define FROM_LOOP internal_ucs4le_loop
435 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
436 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
439 static inline int
440 __attribute ((always_inline))
441 internal_ucs4le_loop (struct __gconv_step *step,
442 struct __gconv_step_data *step_data,
443 const unsigned char **inptrp, const unsigned char *inend,
444 unsigned char **outptrp, unsigned char *outend,
445 size_t *irreversible)
447 const unsigned char *inptr = *inptrp;
448 unsigned char *outptr = *outptrp;
449 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
450 int result;
452 #if __BYTE_ORDER == __BIG_ENDIAN
453 /* Sigh, we have to do some real work. */
454 size_t cnt;
455 uint32_t *outptr32 = (uint32_t *) outptr;
457 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
458 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
459 outptr = (unsigned char *) outptr32;
461 *inptrp = inptr;
462 *outptrp = outptr;
463 #elif __BYTE_ORDER == __LITTLE_ENDIAN
464 /* Simply copy the data. */
465 *inptrp = inptr + n_convert * 4;
466 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
467 #else
468 # error "This endianess is not supported."
469 #endif
471 /* Determine the status. */
472 if (*inptrp == inend)
473 result = __GCONV_EMPTY_INPUT;
474 else if (*outptrp + 4 > outend)
475 result = __GCONV_FULL_OUTPUT;
476 else
477 result = __GCONV_INCOMPLETE_INPUT;
479 return result;
482 #ifndef _STRING_ARCH_unaligned
483 static inline int
484 __attribute ((always_inline))
485 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
486 struct __gconv_step_data *step_data,
487 const unsigned char **inptrp,
488 const unsigned char *inend,
489 unsigned char **outptrp, unsigned char *outend,
490 size_t *irreversible)
492 const unsigned char *inptr = *inptrp;
493 unsigned char *outptr = *outptrp;
494 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
495 int result;
497 # if __BYTE_ORDER == __BIG_ENDIAN
498 /* Sigh, we have to do some real work. */
499 size_t cnt;
501 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
503 outptr[0] = inptr[3];
504 outptr[1] = inptr[2];
505 outptr[2] = inptr[1];
506 outptr[3] = inptr[0];
509 *inptrp = inptr;
510 *outptrp = outptr;
511 # elif __BYTE_ORDER == __LITTLE_ENDIAN
512 /* Simply copy the data. */
513 *inptrp = inptr + n_convert * 4;
514 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
515 # else
516 # error "This endianess is not supported."
517 # endif
519 /* Determine the status. */
520 if (*inptrp == inend)
521 result = __GCONV_EMPTY_INPUT;
522 else if (*inptrp + 4 > inend)
523 result = __GCONV_INCOMPLETE_INPUT;
524 else
526 assert (*outptrp + 4 > outend);
527 result = __GCONV_FULL_OUTPUT;
530 return result;
532 #endif
535 static inline int
536 __attribute ((always_inline))
537 internal_ucs4le_loop_single (struct __gconv_step *step,
538 struct __gconv_step_data *step_data,
539 const unsigned char **inptrp,
540 const unsigned char *inend,
541 unsigned char **outptrp, unsigned char *outend,
542 size_t *irreversible)
544 mbstate_t *state = step_data->__statep;
545 size_t cnt = state->__count & 7;
547 while (*inptrp < inend && cnt < 4)
548 state->__value.__wchb[cnt++] = *(*inptrp)++;
550 if (__builtin_expect (cnt < 4, 0))
552 /* Still not enough bytes. Store the ones in the input buffer. */
553 state->__count &= ~7;
554 state->__count |= cnt;
556 return __GCONV_INCOMPLETE_INPUT;
559 #if __BYTE_ORDER == __BIG_ENDIAN
560 (*outptrp)[0] = state->__value.__wchb[3];
561 (*outptrp)[1] = state->__value.__wchb[2];
562 (*outptrp)[2] = state->__value.__wchb[1];
563 (*outptrp)[3] = state->__value.__wchb[0];
565 #else
566 /* XXX unaligned */
567 (*outptrp)[0] = state->__value.__wchb[0];
568 (*outptrp)[1] = state->__value.__wchb[1];
569 (*outptrp)[2] = state->__value.__wchb[2];
570 (*outptrp)[3] = state->__value.__wchb[3];
572 #endif
574 *outptrp += 4;
576 /* Clear the state buffer. */
577 state->__count &= ~7;
579 return __GCONV_OK;
582 #include <iconv/skeleton.c>
585 /* And finally from UCS4-LE to the internal encoding. */
586 #define DEFINE_INIT 0
587 #define DEFINE_FINI 0
588 #define MIN_NEEDED_FROM 4
589 #define MIN_NEEDED_TO 4
590 #define FROM_DIRECTION 1
591 #define FROM_LOOP ucs4le_internal_loop
592 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
593 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
596 static inline int
597 __attribute ((always_inline))
598 ucs4le_internal_loop (struct __gconv_step *step,
599 struct __gconv_step_data *step_data,
600 const unsigned char **inptrp, const unsigned char *inend,
601 unsigned char **outptrp, unsigned char *outend,
602 size_t *irreversible)
604 int flags = step_data->__flags;
605 const unsigned char *inptr = *inptrp;
606 unsigned char *outptr = *outptrp;
607 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
608 int result;
609 size_t cnt;
611 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
613 uint32_t inval;
615 #if __BYTE_ORDER == __BIG_ENDIAN
616 inval = bswap_32 (*(const uint32_t *) inptr);
617 #else
618 inval = *(const uint32_t *) inptr;
619 #endif
621 if (__builtin_expect (inval > 0x7fffffff, 0))
623 /* The value is too large. We don't try transliteration here since
624 this is not an error because of the lack of possibilities to
625 represent the result. This is a genuine bug in the input since
626 UCS4 does not allow such values. */
627 if (irreversible == NULL)
628 /* We are transliterating, don't try to correct anything. */
629 return __GCONV_ILLEGAL_INPUT;
631 if (flags & __GCONV_IGNORE_ERRORS)
633 /* Just ignore this character. */
634 ++*irreversible;
635 continue;
638 return __GCONV_ILLEGAL_INPUT;
641 *((uint32_t *) outptr) = inval;
642 outptr += sizeof (uint32_t);
645 *inptrp = inptr;
646 *outptrp = outptr;
648 /* Determine the status. */
649 if (*inptrp == inend)
650 result = __GCONV_EMPTY_INPUT;
651 else if (*inptrp + 4 > inend)
652 result = __GCONV_INCOMPLETE_INPUT;
653 else
655 assert (*outptrp + 4 > outend);
656 result = __GCONV_FULL_OUTPUT;
659 return result;
662 #ifndef _STRING_ARCH_unaligned
663 static inline int
664 __attribute ((always_inline))
665 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
666 struct __gconv_step_data *step_data,
667 const unsigned char **inptrp,
668 const unsigned char *inend,
669 unsigned char **outptrp, unsigned char *outend,
670 size_t *irreversible)
672 int flags = step_data->__flags;
673 const unsigned char *inptr = *inptrp;
674 unsigned char *outptr = *outptrp;
675 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
676 int result;
677 size_t cnt;
679 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
681 if (__builtin_expect (inptr[3] > 0x80, 0))
683 /* The value is too large. We don't try transliteration here since
684 this is not an error because of the lack of possibilities to
685 represent the result. This is a genuine bug in the input since
686 UCS4 does not allow such values. */
687 if (irreversible == NULL)
688 /* We are transliterating, don't try to correct anything. */
689 return __GCONV_ILLEGAL_INPUT;
691 if (flags & __GCONV_IGNORE_ERRORS)
693 /* Just ignore this character. */
694 ++*irreversible;
695 continue;
698 *inptrp = inptr;
699 *outptrp = outptr;
700 return __GCONV_ILLEGAL_INPUT;
703 # if __BYTE_ORDER == __BIG_ENDIAN
704 outptr[3] = inptr[0];
705 outptr[2] = inptr[1];
706 outptr[1] = inptr[2];
707 outptr[0] = inptr[3];
708 # else
709 outptr[0] = inptr[0];
710 outptr[1] = inptr[1];
711 outptr[2] = inptr[2];
712 outptr[3] = inptr[3];
713 # endif
715 outptr += 4;
718 *inptrp = inptr;
719 *outptrp = outptr;
721 /* Determine the status. */
722 if (*inptrp == inend)
723 result = __GCONV_EMPTY_INPUT;
724 else if (*inptrp + 4 > inend)
725 result = __GCONV_INCOMPLETE_INPUT;
726 else
728 assert (*outptrp + 4 > outend);
729 result = __GCONV_FULL_OUTPUT;
732 return result;
734 #endif
737 static inline int
738 __attribute ((always_inline))
739 ucs4le_internal_loop_single (struct __gconv_step *step,
740 struct __gconv_step_data *step_data,
741 const unsigned char **inptrp,
742 const unsigned char *inend,
743 unsigned char **outptrp, unsigned char *outend,
744 size_t *irreversible)
746 mbstate_t *state = step_data->__statep;
747 int flags = step_data->__flags;
748 size_t cnt = state->__count & 7;
750 while (*inptrp < inend && cnt < 4)
751 state->__value.__wchb[cnt++] = *(*inptrp)++;
753 if (__builtin_expect (cnt < 4, 0))
755 /* Still not enough bytes. Store the ones in the input buffer. */
756 state->__count &= ~7;
757 state->__count |= cnt;
759 return __GCONV_INCOMPLETE_INPUT;
762 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
765 /* The value is too large. We don't try transliteration here since
766 this is not an error because of the lack of possibilities to
767 represent the result. This is a genuine bug in the input since
768 UCS4 does not allow such values. */
769 if (!(flags & __GCONV_IGNORE_ERRORS))
770 return __GCONV_ILLEGAL_INPUT;
772 else
774 #if __BYTE_ORDER == __BIG_ENDIAN
775 (*outptrp)[0] = state->__value.__wchb[3];
776 (*outptrp)[1] = state->__value.__wchb[2];
777 (*outptrp)[2] = state->__value.__wchb[1];
778 (*outptrp)[3] = state->__value.__wchb[0];
779 #else
780 (*outptrp)[0] = state->__value.__wchb[0];
781 (*outptrp)[1] = state->__value.__wchb[1];
782 (*outptrp)[2] = state->__value.__wchb[2];
783 (*outptrp)[3] = state->__value.__wchb[3];
784 #endif
786 *outptrp += 4;
789 /* Clear the state buffer. */
790 state->__count &= ~7;
792 return __GCONV_OK;
795 #include <iconv/skeleton.c>
798 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
799 #define DEFINE_INIT 0
800 #define DEFINE_FINI 0
801 #define MIN_NEEDED_FROM 1
802 #define MIN_NEEDED_TO 4
803 #define FROM_DIRECTION 1
804 #define FROM_LOOP ascii_internal_loop
805 #define TO_LOOP ascii_internal_loop /* This is not used. */
806 #define FUNCTION_NAME __gconv_transform_ascii_internal
807 #define ONE_DIRECTION 1
809 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
810 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
811 #define LOOPFCT FROM_LOOP
812 #define BODY \
814 if (__builtin_expect (*inptr > '\x7f', 0)) \
816 /* The value is too large. We don't try transliteration here since \
817 this is not an error because of the lack of possibilities to \
818 represent the result. This is a genuine bug in the input since \
819 ASCII does not allow such values. */ \
820 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
822 else \
823 /* It's an one byte sequence. */ \
824 *((uint32_t *) outptr) = *inptr++; \
825 outptr += sizeof (uint32_t); \
827 #define LOOP_NEED_FLAGS
828 #include <iconv/loop.c>
829 #include <iconv/skeleton.c>
832 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
833 #define DEFINE_INIT 0
834 #define DEFINE_FINI 0
835 #define MIN_NEEDED_FROM 4
836 #define MIN_NEEDED_TO 1
837 #define FROM_DIRECTION 1
838 #define FROM_LOOP internal_ascii_loop
839 #define TO_LOOP internal_ascii_loop /* This is not used. */
840 #define FUNCTION_NAME __gconv_transform_internal_ascii
841 #define ONE_DIRECTION 1
843 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
844 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
845 #define LOOPFCT FROM_LOOP
846 #define BODY \
848 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
850 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
851 STANDARD_TO_LOOP_ERR_HANDLER (4); \
853 else \
854 /* It's an one byte sequence. */ \
855 *outptr++ = *((const uint32_t *) inptr); \
856 inptr += sizeof (uint32_t); \
858 #define LOOP_NEED_FLAGS
859 #include <iconv/loop.c>
860 #include <iconv/skeleton.c>
863 /* Convert from the internal (UCS4-like) format to UTF-8. */
864 #define DEFINE_INIT 0
865 #define DEFINE_FINI 0
866 #define MIN_NEEDED_FROM 4
867 #define MIN_NEEDED_TO 1
868 #define MAX_NEEDED_TO 6
869 #define FROM_DIRECTION 1
870 #define FROM_LOOP internal_utf8_loop
871 #define TO_LOOP internal_utf8_loop /* This is not used. */
872 #define FUNCTION_NAME __gconv_transform_internal_utf8
873 #define ONE_DIRECTION 1
875 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
876 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
877 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
878 #define LOOPFCT FROM_LOOP
879 #define BODY \
881 uint32_t wc = *((const uint32_t *) inptr); \
883 if (wc < 0x80) \
884 /* It's an one byte sequence. */ \
885 *outptr++ = (unsigned char) wc; \
886 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
888 size_t step; \
889 unsigned char *start; \
891 for (step = 2; step < 6; ++step) \
892 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
893 break; \
895 if (__builtin_expect (outptr + step > outend, 0)) \
897 /* Too long. */ \
898 result = __GCONV_FULL_OUTPUT; \
899 break; \
902 start = outptr; \
903 *outptr = (unsigned char) (~0xff >> step); \
904 outptr += step; \
905 do \
907 start[--step] = 0x80 | (wc & 0x3f); \
908 wc >>= 6; \
910 while (step > 1); \
911 start[0] |= wc; \
913 else \
915 STANDARD_TO_LOOP_ERR_HANDLER (4); \
918 inptr += 4; \
920 #define LOOP_NEED_FLAGS
921 #include <iconv/loop.c>
922 #include <iconv/skeleton.c>
925 /* Convert from UTF-8 to the internal (UCS4-like) format. */
926 #define DEFINE_INIT 0
927 #define DEFINE_FINI 0
928 #define MIN_NEEDED_FROM 1
929 #define MAX_NEEDED_FROM 6
930 #define MIN_NEEDED_TO 4
931 #define FROM_DIRECTION 1
932 #define FROM_LOOP utf8_internal_loop
933 #define TO_LOOP utf8_internal_loop /* This is not used. */
934 #define FUNCTION_NAME __gconv_transform_utf8_internal
935 #define ONE_DIRECTION 1
937 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
938 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
939 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
940 #define LOOPFCT FROM_LOOP
941 #define BODY \
943 uint32_t ch; \
944 uint_fast32_t cnt; \
945 uint_fast32_t i; \
947 /* Next input byte. */ \
948 ch = *inptr; \
950 if (ch < 0x80) \
952 /* One byte sequence. */ \
953 cnt = 1; \
954 ++inptr; \
956 else \
958 if (ch >= 0xc2 && ch < 0xe0) \
960 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
961 otherwise the wide character could have been represented \
962 using a single byte. */ \
963 cnt = 2; \
964 ch &= 0x1f; \
966 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
968 /* We expect three bytes. */ \
969 cnt = 3; \
970 ch &= 0x0f; \
972 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
974 /* We expect four bytes. */ \
975 cnt = 4; \
976 ch &= 0x07; \
978 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
980 /* We expect five bytes. */ \
981 cnt = 5; \
982 ch &= 0x03; \
984 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
986 /* We expect six bytes. */ \
987 cnt = 6; \
988 ch &= 0x01; \
990 else \
992 /* Search the end of this ill-formed UTF-8 character. This \
993 is the next byte with (x & 0xc0) != 0x80. */ \
994 i = 0; \
995 do \
996 ++i; \
997 while (inptr + i < inend \
998 && (*(inptr + i) & 0xc0) == 0x80 \
999 && i < 5); \
1001 errout: \
1002 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1005 if (__builtin_expect (inptr + cnt > inend, 0)) \
1007 /* We don't have enough input. But before we report that check \
1008 that all the bytes are correct. */ \
1009 for (i = 1; inptr + i < inend; ++i) \
1010 if ((inptr[i] & 0xc0) != 0x80) \
1011 break; \
1013 if (__builtin_expect (inptr + i == inend, 1)) \
1015 result = __GCONV_INCOMPLETE_INPUT; \
1016 break; \
1019 goto errout; \
1022 /* Read the possible remaining bytes. */ \
1023 for (i = 1; i < cnt; ++i) \
1025 uint32_t byte = inptr[i]; \
1027 if ((byte & 0xc0) != 0x80) \
1028 /* This is an illegal encoding. */ \
1029 break; \
1031 ch <<= 6; \
1032 ch |= byte & 0x3f; \
1035 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1036 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1037 have been represented with fewer than cnt bytes. */ \
1038 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1040 /* This is an illegal encoding. */ \
1041 goto errout; \
1044 inptr += cnt; \
1047 /* Now adjust the pointers and store the result. */ \
1048 *((uint32_t *) outptr) = ch; \
1049 outptr += sizeof (uint32_t); \
1051 #define LOOP_NEED_FLAGS
1053 #define STORE_REST \
1055 /* We store the remaining bytes while converting them into the UCS4 \
1056 format. We can assume that the first byte in the buffer is \
1057 correct and that it requires a larger number of bytes than there \
1058 are in the input buffer. */ \
1059 wint_t ch = **inptrp; \
1060 size_t cnt, r; \
1062 state->__count = inend - *inptrp; \
1064 if (ch >= 0xc2 && ch < 0xe0) \
1066 /* We expect two bytes. The first byte cannot be 0xc0 or \
1067 0xc1, otherwise the wide character could have been \
1068 represented using a single byte. */ \
1069 cnt = 2; \
1070 ch &= 0x1f; \
1072 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1074 /* We expect three bytes. */ \
1075 cnt = 3; \
1076 ch &= 0x0f; \
1078 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1080 /* We expect four bytes. */ \
1081 cnt = 4; \
1082 ch &= 0x07; \
1084 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1086 /* We expect five bytes. */ \
1087 cnt = 5; \
1088 ch &= 0x03; \
1090 else \
1092 /* We expect six bytes. */ \
1093 cnt = 6; \
1094 ch &= 0x01; \
1097 /* The first byte is already consumed. */ \
1098 r = cnt - 1; \
1099 while (++(*inptrp) < inend) \
1101 ch <<= 6; \
1102 ch |= **inptrp & 0x3f; \
1103 --r; \
1106 /* Shift for the so far missing bytes. */ \
1107 ch <<= r * 6; \
1109 /* Store the number of bytes expected for the entire sequence. */ \
1110 state->__count |= cnt << 8; \
1112 /* Store the value. */ \
1113 state->__value.__wch = ch; \
1116 #define UNPACK_BYTES \
1118 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1119 wint_t wch = state->__value.__wch; \
1120 size_t ntotal = state->__count >> 8; \
1122 inlen = state->__count & 255; \
1124 bytebuf[0] = inmask[ntotal - 2]; \
1126 do \
1128 if (--ntotal < inlen) \
1129 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1130 wch >>= 6; \
1132 while (ntotal > 1); \
1134 bytebuf[0] |= wch; \
1137 #define CLEAR_STATE \
1138 state->__count = 0
1141 #include <iconv/loop.c>
1142 #include <iconv/skeleton.c>
1145 /* Convert from UCS2 to the internal (UCS4-like) format. */
1146 #define DEFINE_INIT 0
1147 #define DEFINE_FINI 0
1148 #define MIN_NEEDED_FROM 2
1149 #define MIN_NEEDED_TO 4
1150 #define FROM_DIRECTION 1
1151 #define FROM_LOOP ucs2_internal_loop
1152 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1153 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1154 #define ONE_DIRECTION 1
1156 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1157 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1158 #define LOOPFCT FROM_LOOP
1159 #define BODY \
1161 uint16_t u1 = get16 (inptr); \
1163 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1165 /* Surrogate characters in UCS-2 input are not valid. Reject \
1166 them. (Catching this here is not security relevant.) */ \
1167 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1170 *((uint32_t *) outptr) = u1; \
1171 outptr += sizeof (uint32_t); \
1172 inptr += 2; \
1174 #define LOOP_NEED_FLAGS
1175 #include <iconv/loop.c>
1176 #include <iconv/skeleton.c>
1179 /* Convert from the internal (UCS4-like) format to UCS2. */
1180 #define DEFINE_INIT 0
1181 #define DEFINE_FINI 0
1182 #define MIN_NEEDED_FROM 4
1183 #define MIN_NEEDED_TO 2
1184 #define FROM_DIRECTION 1
1185 #define FROM_LOOP internal_ucs2_loop
1186 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1187 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1188 #define ONE_DIRECTION 1
1190 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1191 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1192 #define LOOPFCT FROM_LOOP
1193 #define BODY \
1195 uint32_t val = *((const uint32_t *) inptr); \
1197 if (__builtin_expect (val >= 0x10000, 0)) \
1199 UNICODE_TAG_HANDLER (val, 4); \
1200 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1202 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1204 /* Surrogate characters in UCS-4 input are not valid. \
1205 We must catch this, because the UCS-2 output might be \
1206 interpreted as UTF-16 by other programs. If we let \
1207 surrogates pass through, attackers could make a security \
1208 hole exploit by synthesizing any desired plane 1-16 \
1209 character. */ \
1210 result = __GCONV_ILLEGAL_INPUT; \
1211 if (! ignore_errors_p ()) \
1212 break; \
1213 inptr += 4; \
1214 ++*irreversible; \
1215 continue; \
1217 else \
1219 put16 (outptr, val); \
1220 outptr += sizeof (uint16_t); \
1221 inptr += 4; \
1224 #define LOOP_NEED_FLAGS
1225 #include <iconv/loop.c>
1226 #include <iconv/skeleton.c>
1229 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1230 #define DEFINE_INIT 0
1231 #define DEFINE_FINI 0
1232 #define MIN_NEEDED_FROM 2
1233 #define MIN_NEEDED_TO 4
1234 #define FROM_DIRECTION 1
1235 #define FROM_LOOP ucs2reverse_internal_loop
1236 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1237 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1238 #define ONE_DIRECTION 1
1240 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1241 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1242 #define LOOPFCT FROM_LOOP
1243 #define BODY \
1245 uint16_t u1 = bswap_16 (get16 (inptr)); \
1247 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1249 /* Surrogate characters in UCS-2 input are not valid. Reject \
1250 them. (Catching this here is not security relevant.) */ \
1251 if (! ignore_errors_p ()) \
1253 result = __GCONV_ILLEGAL_INPUT; \
1254 break; \
1256 inptr += 2; \
1257 ++*irreversible; \
1258 continue; \
1261 *((uint32_t *) outptr) = u1; \
1262 outptr += sizeof (uint32_t); \
1263 inptr += 2; \
1265 #define LOOP_NEED_FLAGS
1266 #include <iconv/loop.c>
1267 #include <iconv/skeleton.c>
1270 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1271 #define DEFINE_INIT 0
1272 #define DEFINE_FINI 0
1273 #define MIN_NEEDED_FROM 4
1274 #define MIN_NEEDED_TO 2
1275 #define FROM_DIRECTION 1
1276 #define FROM_LOOP internal_ucs2reverse_loop
1277 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1278 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1279 #define ONE_DIRECTION 1
1281 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1282 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1283 #define LOOPFCT FROM_LOOP
1284 #define BODY \
1286 uint32_t val = *((const uint32_t *) inptr); \
1287 if (__builtin_expect (val >= 0x10000, 0)) \
1289 UNICODE_TAG_HANDLER (val, 4); \
1290 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1292 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1294 /* Surrogate characters in UCS-4 input are not valid. \
1295 We must catch this, because the UCS-2 output might be \
1296 interpreted as UTF-16 by other programs. If we let \
1297 surrogates pass through, attackers could make a security \
1298 hole exploit by synthesizing any desired plane 1-16 \
1299 character. */ \
1300 if (! ignore_errors_p ()) \
1302 result = __GCONV_ILLEGAL_INPUT; \
1303 break; \
1305 inptr += 4; \
1306 ++*irreversible; \
1307 continue; \
1309 else \
1311 put16 (outptr, bswap_16 (val)); \
1312 outptr += sizeof (uint16_t); \
1313 inptr += 4; \
1316 #define LOOP_NEED_FLAGS
1317 #include <iconv/loop.c>
1318 #include <iconv/skeleton.c>