Update autoconf version requirement in install.texi.
[glibc.git] / iconv / gconv_simple.c
blob4ed45052e4d2d100a1e83e522578ca2475553049
1 /* Simple transformations functions.
2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <byteswap.h>
21 #include <dlfcn.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <gconv.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <sys/param.h>
30 #include <gconv_int.h>
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 const unsigned char **, const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
41 #ifndef EILSEQ
42 # define EILSEQ EINVAL
43 #endif
46 /* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
48 wint_t
49 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
51 if (c < 0x80)
52 return c;
53 else
54 return WEOF;
58 /* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
63 #define DEFINE_INIT 0
64 #define DEFINE_FINI 0
65 #define MIN_NEEDED_FROM 4
66 #define MIN_NEEDED_TO 4
67 #define FROM_DIRECTION 1
68 #define FROM_LOOP internal_ucs4_loop
69 #define TO_LOOP internal_ucs4_loop /* This is not used. */
70 #define FUNCTION_NAME __gconv_transform_internal_ucs4
71 #define ONE_DIRECTION 0
74 static inline int
75 __attribute ((always_inline))
76 internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
79 unsigned char **outptrp, unsigned char *outend,
80 size_t *irreversible)
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
85 int result;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
90 uint32_t *outptr32 = (uint32_t *) outptr;
92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
95 *inptrp = inptr;
96 *outptrp = (unsigned char *) outptr32;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
101 #else
102 # error "This endianess is not supported."
103 #endif
105 /* Determine the status. */
106 if (*inptrp == inend)
107 result = __GCONV_EMPTY_INPUT;
108 else if (*outptrp + 4 > outend)
109 result = __GCONV_FULL_OUTPUT;
110 else
111 result = __GCONV_INCOMPLETE_INPUT;
113 return result;
116 #if !_STRING_ARCH_unaligned
117 static inline int
118 __attribute ((always_inline))
119 internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
122 const unsigned char *inend,
123 unsigned char **outptrp, unsigned char *outend,
124 size_t *irreversible)
126 const unsigned char *inptr = *inptrp;
127 unsigned char *outptr = *outptrp;
128 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
129 int result;
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
133 size_t cnt;
135 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
137 outptr[0] = inptr[3];
138 outptr[1] = inptr[2];
139 outptr[2] = inptr[1];
140 outptr[3] = inptr[0];
143 *inptrp = inptr;
144 *outptrp = outptr;
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp = inptr + n_convert * 4;
148 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
149 # else
150 # error "This endianess is not supported."
151 # endif
153 /* Determine the status. */
154 if (*inptrp == inend)
155 result = __GCONV_EMPTY_INPUT;
156 else if (*outptrp + 4 > outend)
157 result = __GCONV_FULL_OUTPUT;
158 else
159 result = __GCONV_INCOMPLETE_INPUT;
161 return result;
163 #endif
166 static inline int
167 __attribute ((always_inline))
168 internal_ucs4_loop_single (struct __gconv_step *step,
169 struct __gconv_step_data *step_data,
170 const unsigned char **inptrp,
171 const unsigned char *inend,
172 unsigned char **outptrp, unsigned char *outend,
173 size_t *irreversible)
175 mbstate_t *state = step_data->__statep;
176 size_t cnt = state->__count & 7;
178 while (*inptrp < inend && cnt < 4)
179 state->__value.__wchb[cnt++] = *(*inptrp)++;
181 if (__glibc_unlikely (cnt < 4))
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state->__count &= ~7;
185 state->__count |= cnt;
187 return __GCONV_INCOMPLETE_INPUT;
190 #if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp)[0] = state->__value.__wchb[3];
192 (*outptrp)[1] = state->__value.__wchb[2];
193 (*outptrp)[2] = state->__value.__wchb[1];
194 (*outptrp)[3] = state->__value.__wchb[0];
196 #elif __BYTE_ORDER == __BIG_ENDIAN
197 /* XXX unaligned */
198 (*outptrp)[0] = state->__value.__wchb[0];
199 (*outptrp)[1] = state->__value.__wchb[1];
200 (*outptrp)[2] = state->__value.__wchb[2];
201 (*outptrp)[3] = state->__value.__wchb[3];
202 #else
203 # error "This endianess is not supported."
204 #endif
205 *outptrp += 4;
207 /* Clear the state buffer. */
208 state->__count &= ~7;
210 return __GCONV_OK;
213 #include <iconv/skeleton.c>
216 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218 #define DEFINE_INIT 0
219 #define DEFINE_FINI 0
220 #define MIN_NEEDED_FROM 4
221 #define MIN_NEEDED_TO 4
222 #define FROM_DIRECTION 1
223 #define FROM_LOOP ucs4_internal_loop
224 #define TO_LOOP ucs4_internal_loop /* This is not used. */
225 #define FUNCTION_NAME __gconv_transform_ucs4_internal
226 #define ONE_DIRECTION 0
229 static inline int
230 __attribute ((always_inline))
231 ucs4_internal_loop (struct __gconv_step *step,
232 struct __gconv_step_data *step_data,
233 const unsigned char **inptrp, const unsigned char *inend,
234 unsigned char **outptrp, unsigned char *outend,
235 size_t *irreversible)
237 int flags = step_data->__flags;
238 const unsigned char *inptr = *inptrp;
239 unsigned char *outptr = *outptrp;
240 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
241 int result;
242 size_t cnt;
244 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
246 uint32_t inval;
248 #if __BYTE_ORDER == __LITTLE_ENDIAN
249 inval = bswap_32 (*(const uint32_t *) inptr);
250 #else
251 inval = *(const uint32_t *) inptr;
252 #endif
254 if (__glibc_unlikely (inval > 0x7fffffff))
256 /* The value is too large. We don't try transliteration here since
257 this is not an error because of the lack of possibilities to
258 represent the result. This is a genuine bug in the input since
259 UCS4 does not allow such values. */
260 if (irreversible == NULL)
261 /* We are transliterating, don't try to correct anything. */
262 return __GCONV_ILLEGAL_INPUT;
264 if (flags & __GCONV_IGNORE_ERRORS)
266 /* Just ignore this character. */
267 ++*irreversible;
268 continue;
271 *inptrp = inptr;
272 *outptrp = outptr;
273 return __GCONV_ILLEGAL_INPUT;
276 *((uint32_t *) outptr) = inval;
277 outptr += sizeof (uint32_t);
280 *inptrp = inptr;
281 *outptrp = outptr;
283 /* Determine the status. */
284 if (*inptrp == inend)
285 result = __GCONV_EMPTY_INPUT;
286 else if (*outptrp + 4 > outend)
287 result = __GCONV_FULL_OUTPUT;
288 else
289 result = __GCONV_INCOMPLETE_INPUT;
291 return result;
294 #if !_STRING_ARCH_unaligned
295 static inline int
296 __attribute ((always_inline))
297 ucs4_internal_loop_unaligned (struct __gconv_step *step,
298 struct __gconv_step_data *step_data,
299 const unsigned char **inptrp,
300 const unsigned char *inend,
301 unsigned char **outptrp, unsigned char *outend,
302 size_t *irreversible)
304 int flags = step_data->__flags;
305 const unsigned char *inptr = *inptrp;
306 unsigned char *outptr = *outptrp;
307 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
308 int result;
309 size_t cnt;
311 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
313 if (__glibc_unlikely (inptr[0] > 0x80))
315 /* The value is too large. We don't try transliteration here since
316 this is not an error because of the lack of possibilities to
317 represent the result. This is a genuine bug in the input since
318 UCS4 does not allow such values. */
319 if (irreversible == NULL)
320 /* We are transliterating, don't try to correct anything. */
321 return __GCONV_ILLEGAL_INPUT;
323 if (flags & __GCONV_IGNORE_ERRORS)
325 /* Just ignore this character. */
326 ++*irreversible;
327 continue;
330 *inptrp = inptr;
331 *outptrp = outptr;
332 return __GCONV_ILLEGAL_INPUT;
335 # if __BYTE_ORDER == __LITTLE_ENDIAN
336 outptr[3] = inptr[0];
337 outptr[2] = inptr[1];
338 outptr[1] = inptr[2];
339 outptr[0] = inptr[3];
340 # else
341 outptr[0] = inptr[0];
342 outptr[1] = inptr[1];
343 outptr[2] = inptr[2];
344 outptr[3] = inptr[3];
345 # endif
346 outptr += 4;
349 *inptrp = inptr;
350 *outptrp = outptr;
352 /* Determine the status. */
353 if (*inptrp == inend)
354 result = __GCONV_EMPTY_INPUT;
355 else if (*outptrp + 4 > outend)
356 result = __GCONV_FULL_OUTPUT;
357 else
358 result = __GCONV_INCOMPLETE_INPUT;
360 return result;
362 #endif
365 static inline int
366 __attribute ((always_inline))
367 ucs4_internal_loop_single (struct __gconv_step *step,
368 struct __gconv_step_data *step_data,
369 const unsigned char **inptrp,
370 const unsigned char *inend,
371 unsigned char **outptrp, unsigned char *outend,
372 size_t *irreversible)
374 mbstate_t *state = step_data->__statep;
375 int flags = step_data->__flags;
376 size_t cnt = state->__count & 7;
378 while (*inptrp < inend && cnt < 4)
379 state->__value.__wchb[cnt++] = *(*inptrp)++;
381 if (__glibc_unlikely (cnt < 4))
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state->__count &= ~7;
385 state->__count |= cnt;
387 return __GCONV_INCOMPLETE_INPUT;
390 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
393 /* The value is too large. We don't try transliteration here since
394 this is not an error because of the lack of possibilities to
395 represent the result. This is a genuine bug in the input since
396 UCS4 does not allow such values. */
397 if (!(flags & __GCONV_IGNORE_ERRORS))
399 *inptrp -= cnt - (state->__count & 7);
400 return __GCONV_ILLEGAL_INPUT;
403 else
405 #if __BYTE_ORDER == __LITTLE_ENDIAN
406 (*outptrp)[0] = state->__value.__wchb[3];
407 (*outptrp)[1] = state->__value.__wchb[2];
408 (*outptrp)[2] = state->__value.__wchb[1];
409 (*outptrp)[3] = state->__value.__wchb[0];
410 #elif __BYTE_ORDER == __BIG_ENDIAN
411 (*outptrp)[0] = state->__value.__wchb[0];
412 (*outptrp)[1] = state->__value.__wchb[1];
413 (*outptrp)[2] = state->__value.__wchb[2];
414 (*outptrp)[3] = state->__value.__wchb[3];
415 #endif
417 *outptrp += 4;
420 /* Clear the state buffer. */
421 state->__count &= ~7;
423 return __GCONV_OK;
426 #include <iconv/skeleton.c>
429 /* Similarly for the little endian form. */
430 #define DEFINE_INIT 0
431 #define DEFINE_FINI 0
432 #define MIN_NEEDED_FROM 4
433 #define MIN_NEEDED_TO 4
434 #define FROM_DIRECTION 1
435 #define FROM_LOOP internal_ucs4le_loop
436 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
437 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
438 #define ONE_DIRECTION 0
441 static inline int
442 __attribute ((always_inline))
443 internal_ucs4le_loop (struct __gconv_step *step,
444 struct __gconv_step_data *step_data,
445 const unsigned char **inptrp, const unsigned char *inend,
446 unsigned char **outptrp, unsigned char *outend,
447 size_t *irreversible)
449 const unsigned char *inptr = *inptrp;
450 unsigned char *outptr = *outptrp;
451 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
452 int result;
454 #if __BYTE_ORDER == __BIG_ENDIAN
455 /* Sigh, we have to do some real work. */
456 size_t cnt;
457 uint32_t *outptr32 = (uint32_t *) outptr;
459 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
460 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
461 outptr = (unsigned char *) outptr32;
463 *inptrp = inptr;
464 *outptrp = outptr;
465 #elif __BYTE_ORDER == __LITTLE_ENDIAN
466 /* Simply copy the data. */
467 *inptrp = inptr + n_convert * 4;
468 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
469 #else
470 # error "This endianess is not supported."
471 #endif
473 /* Determine the status. */
474 if (*inptrp == inend)
475 result = __GCONV_EMPTY_INPUT;
476 else if (*outptrp + 4 > outend)
477 result = __GCONV_FULL_OUTPUT;
478 else
479 result = __GCONV_INCOMPLETE_INPUT;
481 return result;
484 #if !_STRING_ARCH_unaligned
485 static inline int
486 __attribute ((always_inline))
487 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
488 struct __gconv_step_data *step_data,
489 const unsigned char **inptrp,
490 const unsigned char *inend,
491 unsigned char **outptrp, unsigned char *outend,
492 size_t *irreversible)
494 const unsigned char *inptr = *inptrp;
495 unsigned char *outptr = *outptrp;
496 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
497 int result;
499 # if __BYTE_ORDER == __BIG_ENDIAN
500 /* Sigh, we have to do some real work. */
501 size_t cnt;
503 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
505 outptr[0] = inptr[3];
506 outptr[1] = inptr[2];
507 outptr[2] = inptr[1];
508 outptr[3] = inptr[0];
511 *inptrp = inptr;
512 *outptrp = outptr;
513 # elif __BYTE_ORDER == __LITTLE_ENDIAN
514 /* Simply copy the data. */
515 *inptrp = inptr + n_convert * 4;
516 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
517 # else
518 # error "This endianess is not supported."
519 # endif
521 /* Determine the status. */
522 if (*inptrp == inend)
523 result = __GCONV_EMPTY_INPUT;
524 else if (*inptrp + 4 > inend)
525 result = __GCONV_INCOMPLETE_INPUT;
526 else
528 assert (*outptrp + 4 > outend);
529 result = __GCONV_FULL_OUTPUT;
532 return result;
534 #endif
537 static inline int
538 __attribute ((always_inline))
539 internal_ucs4le_loop_single (struct __gconv_step *step,
540 struct __gconv_step_data *step_data,
541 const unsigned char **inptrp,
542 const unsigned char *inend,
543 unsigned char **outptrp, unsigned char *outend,
544 size_t *irreversible)
546 mbstate_t *state = step_data->__statep;
547 size_t cnt = state->__count & 7;
549 while (*inptrp < inend && cnt < 4)
550 state->__value.__wchb[cnt++] = *(*inptrp)++;
552 if (__glibc_unlikely (cnt < 4))
554 /* Still not enough bytes. Store the ones in the input buffer. */
555 state->__count &= ~7;
556 state->__count |= cnt;
558 return __GCONV_INCOMPLETE_INPUT;
561 #if __BYTE_ORDER == __BIG_ENDIAN
562 (*outptrp)[0] = state->__value.__wchb[3];
563 (*outptrp)[1] = state->__value.__wchb[2];
564 (*outptrp)[2] = state->__value.__wchb[1];
565 (*outptrp)[3] = state->__value.__wchb[0];
567 #else
568 /* XXX unaligned */
569 (*outptrp)[0] = state->__value.__wchb[0];
570 (*outptrp)[1] = state->__value.__wchb[1];
571 (*outptrp)[2] = state->__value.__wchb[2];
572 (*outptrp)[3] = state->__value.__wchb[3];
574 #endif
576 *outptrp += 4;
578 /* Clear the state buffer. */
579 state->__count &= ~7;
581 return __GCONV_OK;
584 #include <iconv/skeleton.c>
587 /* And finally from UCS4-LE to the internal encoding. */
588 #define DEFINE_INIT 0
589 #define DEFINE_FINI 0
590 #define MIN_NEEDED_FROM 4
591 #define MIN_NEEDED_TO 4
592 #define FROM_DIRECTION 1
593 #define FROM_LOOP ucs4le_internal_loop
594 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
595 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
596 #define ONE_DIRECTION 0
599 static inline int
600 __attribute ((always_inline))
601 ucs4le_internal_loop (struct __gconv_step *step,
602 struct __gconv_step_data *step_data,
603 const unsigned char **inptrp, const unsigned char *inend,
604 unsigned char **outptrp, unsigned char *outend,
605 size_t *irreversible)
607 int flags = step_data->__flags;
608 const unsigned char *inptr = *inptrp;
609 unsigned char *outptr = *outptrp;
610 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
611 int result;
612 size_t cnt;
614 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
616 uint32_t inval;
618 #if __BYTE_ORDER == __BIG_ENDIAN
619 inval = bswap_32 (*(const uint32_t *) inptr);
620 #else
621 inval = *(const uint32_t *) inptr;
622 #endif
624 if (__glibc_unlikely (inval > 0x7fffffff))
626 /* The value is too large. We don't try transliteration here since
627 this is not an error because of the lack of possibilities to
628 represent the result. This is a genuine bug in the input since
629 UCS4 does not allow such values. */
630 if (irreversible == NULL)
631 /* We are transliterating, don't try to correct anything. */
632 return __GCONV_ILLEGAL_INPUT;
634 if (flags & __GCONV_IGNORE_ERRORS)
636 /* Just ignore this character. */
637 ++*irreversible;
638 continue;
641 return __GCONV_ILLEGAL_INPUT;
644 *((uint32_t *) outptr) = inval;
645 outptr += sizeof (uint32_t);
648 *inptrp = inptr;
649 *outptrp = outptr;
651 /* Determine the status. */
652 if (*inptrp == inend)
653 result = __GCONV_EMPTY_INPUT;
654 else if (*inptrp + 4 > inend)
655 result = __GCONV_INCOMPLETE_INPUT;
656 else
658 assert (*outptrp + 4 > outend);
659 result = __GCONV_FULL_OUTPUT;
662 return result;
665 #if !_STRING_ARCH_unaligned
666 static inline int
667 __attribute ((always_inline))
668 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
669 struct __gconv_step_data *step_data,
670 const unsigned char **inptrp,
671 const unsigned char *inend,
672 unsigned char **outptrp, unsigned char *outend,
673 size_t *irreversible)
675 int flags = step_data->__flags;
676 const unsigned char *inptr = *inptrp;
677 unsigned char *outptr = *outptrp;
678 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
679 int result;
680 size_t cnt;
682 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
684 if (__glibc_unlikely (inptr[3] > 0x80))
686 /* The value is too large. We don't try transliteration here since
687 this is not an error because of the lack of possibilities to
688 represent the result. This is a genuine bug in the input since
689 UCS4 does not allow such values. */
690 if (irreversible == NULL)
691 /* We are transliterating, don't try to correct anything. */
692 return __GCONV_ILLEGAL_INPUT;
694 if (flags & __GCONV_IGNORE_ERRORS)
696 /* Just ignore this character. */
697 ++*irreversible;
698 continue;
701 *inptrp = inptr;
702 *outptrp = outptr;
703 return __GCONV_ILLEGAL_INPUT;
706 # if __BYTE_ORDER == __BIG_ENDIAN
707 outptr[3] = inptr[0];
708 outptr[2] = inptr[1];
709 outptr[1] = inptr[2];
710 outptr[0] = inptr[3];
711 # else
712 outptr[0] = inptr[0];
713 outptr[1] = inptr[1];
714 outptr[2] = inptr[2];
715 outptr[3] = inptr[3];
716 # endif
718 outptr += 4;
721 *inptrp = inptr;
722 *outptrp = outptr;
724 /* Determine the status. */
725 if (*inptrp == inend)
726 result = __GCONV_EMPTY_INPUT;
727 else if (*inptrp + 4 > inend)
728 result = __GCONV_INCOMPLETE_INPUT;
729 else
731 assert (*outptrp + 4 > outend);
732 result = __GCONV_FULL_OUTPUT;
735 return result;
737 #endif
740 static inline int
741 __attribute ((always_inline))
742 ucs4le_internal_loop_single (struct __gconv_step *step,
743 struct __gconv_step_data *step_data,
744 const unsigned char **inptrp,
745 const unsigned char *inend,
746 unsigned char **outptrp, unsigned char *outend,
747 size_t *irreversible)
749 mbstate_t *state = step_data->__statep;
750 int flags = step_data->__flags;
751 size_t cnt = state->__count & 7;
753 while (*inptrp < inend && cnt < 4)
754 state->__value.__wchb[cnt++] = *(*inptrp)++;
756 if (__glibc_unlikely (cnt < 4))
758 /* Still not enough bytes. Store the ones in the input buffer. */
759 state->__count &= ~7;
760 state->__count |= cnt;
762 return __GCONV_INCOMPLETE_INPUT;
765 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
768 /* The value is too large. We don't try transliteration here since
769 this is not an error because of the lack of possibilities to
770 represent the result. This is a genuine bug in the input since
771 UCS4 does not allow such values. */
772 if (!(flags & __GCONV_IGNORE_ERRORS))
773 return __GCONV_ILLEGAL_INPUT;
775 else
777 #if __BYTE_ORDER == __BIG_ENDIAN
778 (*outptrp)[0] = state->__value.__wchb[3];
779 (*outptrp)[1] = state->__value.__wchb[2];
780 (*outptrp)[2] = state->__value.__wchb[1];
781 (*outptrp)[3] = state->__value.__wchb[0];
782 #else
783 (*outptrp)[0] = state->__value.__wchb[0];
784 (*outptrp)[1] = state->__value.__wchb[1];
785 (*outptrp)[2] = state->__value.__wchb[2];
786 (*outptrp)[3] = state->__value.__wchb[3];
787 #endif
789 *outptrp += 4;
792 /* Clear the state buffer. */
793 state->__count &= ~7;
795 return __GCONV_OK;
798 #include <iconv/skeleton.c>
801 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
802 #define DEFINE_INIT 0
803 #define DEFINE_FINI 0
804 #define MIN_NEEDED_FROM 1
805 #define MIN_NEEDED_TO 4
806 #define FROM_DIRECTION 1
807 #define FROM_LOOP ascii_internal_loop
808 #define TO_LOOP ascii_internal_loop /* This is not used. */
809 #define FUNCTION_NAME __gconv_transform_ascii_internal
810 #define ONE_DIRECTION 1
812 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
813 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
814 #define LOOPFCT FROM_LOOP
815 #define BODY \
817 if (__glibc_unlikely (*inptr > '\x7f')) \
819 /* The value is too large. We don't try transliteration here since \
820 this is not an error because of the lack of possibilities to \
821 represent the result. This is a genuine bug in the input since \
822 ASCII does not allow such values. */ \
823 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
825 else \
827 /* It's an one byte sequence. */ \
828 *((uint32_t *) outptr) = *inptr++; \
829 outptr += sizeof (uint32_t); \
832 #define LOOP_NEED_FLAGS
833 #include <iconv/loop.c>
834 #include <iconv/skeleton.c>
837 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
838 #define DEFINE_INIT 0
839 #define DEFINE_FINI 0
840 #define MIN_NEEDED_FROM 4
841 #define MIN_NEEDED_TO 1
842 #define FROM_DIRECTION 1
843 #define FROM_LOOP internal_ascii_loop
844 #define TO_LOOP internal_ascii_loop /* This is not used. */
845 #define FUNCTION_NAME __gconv_transform_internal_ascii
846 #define ONE_DIRECTION 1
848 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
849 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
850 #define LOOPFCT FROM_LOOP
851 #define BODY \
853 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
855 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
856 STANDARD_TO_LOOP_ERR_HANDLER (4); \
858 else \
860 /* It's an one byte sequence. */ \
861 *outptr++ = *((const uint32_t *) inptr); \
862 inptr += sizeof (uint32_t); \
865 #define LOOP_NEED_FLAGS
866 #include <iconv/loop.c>
867 #include <iconv/skeleton.c>
870 /* Convert from the internal (UCS4-like) format to UTF-8. */
871 #define DEFINE_INIT 0
872 #define DEFINE_FINI 0
873 #define MIN_NEEDED_FROM 4
874 #define MIN_NEEDED_TO 1
875 #define MAX_NEEDED_TO 6
876 #define FROM_DIRECTION 1
877 #define FROM_LOOP internal_utf8_loop
878 #define TO_LOOP internal_utf8_loop /* This is not used. */
879 #define FUNCTION_NAME __gconv_transform_internal_utf8
880 #define ONE_DIRECTION 1
882 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
883 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
884 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
885 #define LOOPFCT FROM_LOOP
886 #define BODY \
888 uint32_t wc = *((const uint32_t *) inptr); \
890 if (__glibc_likely (wc < 0x80)) \
891 /* It's an one byte sequence. */ \
892 *outptr++ = (unsigned char) wc; \
893 else if (__glibc_likely (wc <= 0x7fffffff)) \
895 size_t step; \
896 unsigned char *start; \
898 for (step = 2; step < 6; ++step) \
899 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
900 break; \
902 if (__glibc_unlikely (outptr + step > outend)) \
904 /* Too long. */ \
905 result = __GCONV_FULL_OUTPUT; \
906 break; \
909 start = outptr; \
910 *outptr = (unsigned char) (~0xff >> step); \
911 outptr += step; \
912 do \
914 start[--step] = 0x80 | (wc & 0x3f); \
915 wc >>= 6; \
917 while (step > 1); \
918 start[0] |= wc; \
920 else \
922 STANDARD_TO_LOOP_ERR_HANDLER (4); \
925 inptr += 4; \
927 #define LOOP_NEED_FLAGS
928 #include <iconv/loop.c>
929 #include <iconv/skeleton.c>
932 /* Convert from UTF-8 to the internal (UCS4-like) format. */
933 #define DEFINE_INIT 0
934 #define DEFINE_FINI 0
935 #define MIN_NEEDED_FROM 1
936 #define MAX_NEEDED_FROM 6
937 #define MIN_NEEDED_TO 4
938 #define FROM_DIRECTION 1
939 #define FROM_LOOP utf8_internal_loop
940 #define TO_LOOP utf8_internal_loop /* This is not used. */
941 #define FUNCTION_NAME __gconv_transform_utf8_internal
942 #define ONE_DIRECTION 1
944 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
945 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
946 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
947 #define LOOPFCT FROM_LOOP
948 #define BODY \
950 /* Next input byte. */ \
951 uint32_t ch = *inptr; \
953 if (__glibc_likely (ch < 0x80)) \
955 /* One byte sequence. */ \
956 ++inptr; \
958 else \
960 uint_fast32_t cnt; \
961 uint_fast32_t i; \
963 if (ch >= 0xc2 && ch < 0xe0) \
965 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
966 otherwise the wide character could have been represented \
967 using a single byte. */ \
968 cnt = 2; \
969 ch &= 0x1f; \
971 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
973 /* We expect three bytes. */ \
974 cnt = 3; \
975 ch &= 0x0f; \
977 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
979 /* We expect four bytes. */ \
980 cnt = 4; \
981 ch &= 0x07; \
983 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
985 /* We expect five bytes. */ \
986 cnt = 5; \
987 ch &= 0x03; \
989 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
991 /* We expect six bytes. */ \
992 cnt = 6; \
993 ch &= 0x01; \
995 else \
997 /* Search the end of this ill-formed UTF-8 character. This \
998 is the next byte with (x & 0xc0) != 0x80. */ \
999 i = 0; \
1000 do \
1001 ++i; \
1002 while (inptr + i < inend \
1003 && (*(inptr + i) & 0xc0) == 0x80 \
1004 && i < 5); \
1006 errout: \
1007 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1010 if (__glibc_unlikely (inptr + cnt > inend)) \
1012 /* We don't have enough input. But before we report that check \
1013 that all the bytes are correct. */ \
1014 for (i = 1; inptr + i < inend; ++i) \
1015 if ((inptr[i] & 0xc0) != 0x80) \
1016 break; \
1018 if (__glibc_likely (inptr + i == inend)) \
1020 result = __GCONV_INCOMPLETE_INPUT; \
1021 break; \
1024 goto errout; \
1027 /* Read the possible remaining bytes. */ \
1028 for (i = 1; i < cnt; ++i) \
1030 uint32_t byte = inptr[i]; \
1032 if ((byte & 0xc0) != 0x80) \
1033 /* This is an illegal encoding. */ \
1034 break; \
1036 ch <<= 6; \
1037 ch |= byte & 0x3f; \
1040 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1041 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1042 have been represented with fewer than cnt bytes. */ \
1043 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1044 /* Do not accept UTF-16 surrogates. */ \
1045 || (ch >= 0xd800 && ch <= 0xdfff)) \
1047 /* This is an illegal encoding. */ \
1048 goto errout; \
1051 inptr += cnt; \
1054 /* Now adjust the pointers and store the result. */ \
1055 *((uint32_t *) outptr) = ch; \
1056 outptr += sizeof (uint32_t); \
1058 #define LOOP_NEED_FLAGS
1060 #define STORE_REST \
1062 /* We store the remaining bytes while converting them into the UCS4 \
1063 format. We can assume that the first byte in the buffer is \
1064 correct and that it requires a larger number of bytes than there \
1065 are in the input buffer. */ \
1066 wint_t ch = **inptrp; \
1067 size_t cnt, r; \
1069 state->__count = inend - *inptrp; \
1071 assert (ch != 0xc0 && ch != 0xc1); \
1072 if (ch >= 0xc2 && ch < 0xe0) \
1074 /* We expect two bytes. The first byte cannot be 0xc0 or \
1075 0xc1, otherwise the wide character could have been \
1076 represented using a single byte. */ \
1077 cnt = 2; \
1078 ch &= 0x1f; \
1080 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
1082 /* We expect three bytes. */ \
1083 cnt = 3; \
1084 ch &= 0x0f; \
1086 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
1088 /* We expect four bytes. */ \
1089 cnt = 4; \
1090 ch &= 0x07; \
1092 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
1094 /* We expect five bytes. */ \
1095 cnt = 5; \
1096 ch &= 0x03; \
1098 else \
1100 /* We expect six bytes. */ \
1101 cnt = 6; \
1102 ch &= 0x01; \
1105 /* The first byte is already consumed. */ \
1106 r = cnt - 1; \
1107 while (++(*inptrp) < inend) \
1109 ch <<= 6; \
1110 ch |= **inptrp & 0x3f; \
1111 --r; \
1114 /* Shift for the so far missing bytes. */ \
1115 ch <<= r * 6; \
1117 /* Store the number of bytes expected for the entire sequence. */ \
1118 state->__count |= cnt << 8; \
1120 /* Store the value. */ \
1121 state->__value.__wch = ch; \
1124 #define UNPACK_BYTES \
1126 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1127 wint_t wch = state->__value.__wch; \
1128 size_t ntotal = state->__count >> 8; \
1130 inlen = state->__count & 255; \
1132 bytebuf[0] = inmask[ntotal - 2]; \
1134 do \
1136 if (--ntotal < inlen) \
1137 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1138 wch >>= 6; \
1140 while (ntotal > 1); \
1142 bytebuf[0] |= wch; \
1145 #define CLEAR_STATE \
1146 state->__count = 0
1149 #include <iconv/loop.c>
1150 #include <iconv/skeleton.c>
1153 /* Convert from UCS2 to the internal (UCS4-like) format. */
1154 #define DEFINE_INIT 0
1155 #define DEFINE_FINI 0
1156 #define MIN_NEEDED_FROM 2
1157 #define MIN_NEEDED_TO 4
1158 #define FROM_DIRECTION 1
1159 #define FROM_LOOP ucs2_internal_loop
1160 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1161 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1162 #define ONE_DIRECTION 1
1164 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1165 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1166 #define LOOPFCT FROM_LOOP
1167 #define BODY \
1169 uint16_t u1 = get16 (inptr); \
1171 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1173 /* Surrogate characters in UCS-2 input are not valid. Reject \
1174 them. (Catching this here is not security relevant.) */ \
1175 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1178 *((uint32_t *) outptr) = u1; \
1179 outptr += sizeof (uint32_t); \
1180 inptr += 2; \
1182 #define LOOP_NEED_FLAGS
1183 #include <iconv/loop.c>
1184 #include <iconv/skeleton.c>
1187 /* Convert from the internal (UCS4-like) format to UCS2. */
1188 #define DEFINE_INIT 0
1189 #define DEFINE_FINI 0
1190 #define MIN_NEEDED_FROM 4
1191 #define MIN_NEEDED_TO 2
1192 #define FROM_DIRECTION 1
1193 #define FROM_LOOP internal_ucs2_loop
1194 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1195 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1196 #define ONE_DIRECTION 1
1198 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1199 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1200 #define LOOPFCT FROM_LOOP
1201 #define BODY \
1203 uint32_t val = *((const uint32_t *) inptr); \
1205 if (__glibc_unlikely (val >= 0x10000)) \
1207 UNICODE_TAG_HANDLER (val, 4); \
1208 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1210 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1212 /* Surrogate characters in UCS-4 input are not valid. \
1213 We must catch this, because the UCS-2 output might be \
1214 interpreted as UTF-16 by other programs. If we let \
1215 surrogates pass through, attackers could make a security \
1216 hole exploit by synthesizing any desired plane 1-16 \
1217 character. */ \
1218 result = __GCONV_ILLEGAL_INPUT; \
1219 if (! ignore_errors_p ()) \
1220 break; \
1221 inptr += 4; \
1222 ++*irreversible; \
1223 continue; \
1225 else \
1227 put16 (outptr, val); \
1228 outptr += sizeof (uint16_t); \
1229 inptr += 4; \
1232 #define LOOP_NEED_FLAGS
1233 #include <iconv/loop.c>
1234 #include <iconv/skeleton.c>
1237 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1238 #define DEFINE_INIT 0
1239 #define DEFINE_FINI 0
1240 #define MIN_NEEDED_FROM 2
1241 #define MIN_NEEDED_TO 4
1242 #define FROM_DIRECTION 1
1243 #define FROM_LOOP ucs2reverse_internal_loop
1244 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1245 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1246 #define ONE_DIRECTION 1
1248 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1249 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1250 #define LOOPFCT FROM_LOOP
1251 #define BODY \
1253 uint16_t u1 = bswap_16 (get16 (inptr)); \
1255 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1257 /* Surrogate characters in UCS-2 input are not valid. Reject \
1258 them. (Catching this here is not security relevant.) */ \
1259 if (! ignore_errors_p ()) \
1261 result = __GCONV_ILLEGAL_INPUT; \
1262 break; \
1264 inptr += 2; \
1265 ++*irreversible; \
1266 continue; \
1269 *((uint32_t *) outptr) = u1; \
1270 outptr += sizeof (uint32_t); \
1271 inptr += 2; \
1273 #define LOOP_NEED_FLAGS
1274 #include <iconv/loop.c>
1275 #include <iconv/skeleton.c>
1278 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1279 #define DEFINE_INIT 0
1280 #define DEFINE_FINI 0
1281 #define MIN_NEEDED_FROM 4
1282 #define MIN_NEEDED_TO 2
1283 #define FROM_DIRECTION 1
1284 #define FROM_LOOP internal_ucs2reverse_loop
1285 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1286 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1287 #define ONE_DIRECTION 1
1289 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1290 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1291 #define LOOPFCT FROM_LOOP
1292 #define BODY \
1294 uint32_t val = *((const uint32_t *) inptr); \
1295 if (__glibc_unlikely (val >= 0x10000)) \
1297 UNICODE_TAG_HANDLER (val, 4); \
1298 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1300 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1302 /* Surrogate characters in UCS-4 input are not valid. \
1303 We must catch this, because the UCS-2 output might be \
1304 interpreted as UTF-16 by other programs. If we let \
1305 surrogates pass through, attackers could make a security \
1306 hole exploit by synthesizing any desired plane 1-16 \
1307 character. */ \
1308 if (! ignore_errors_p ()) \
1310 result = __GCONV_ILLEGAL_INPUT; \
1311 break; \
1313 inptr += 4; \
1314 ++*irreversible; \
1315 continue; \
1317 else \
1319 put16 (outptr, bswap_16 (val)); \
1320 outptr += sizeof (uint16_t); \
1321 inptr += 4; \
1324 #define LOOP_NEED_FLAGS
1325 #include <iconv/loop.c>
1326 #include <iconv/skeleton.c>