malloc/Makefile: Split and sort tests
[glibc.git] / iconv / gconv_simple.c
blob257be2f8ff6a881153975d8748167bb20e911093
1 /* Simple transformations functions.
2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <byteswap.h>
20 #include <dlfcn.h>
21 #include <endian.h>
22 #include <errno.h>
23 #include <gconv.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <wchar.h>
28 #include <sys/param.h>
29 #include <gconv_int.h>
31 #define BUILTIN_ALIAS(s1, s2) /* nothing */
32 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
33 MinF, MaxF, MinT, MaxT) \
34 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
35 const unsigned char **, const unsigned char *, \
36 unsigned char **, size_t *, int, int);
37 #include "gconv_builtin.h"
40 #ifndef EILSEQ
41 # define EILSEQ EINVAL
42 #endif
45 /* Specialized conversion function for a single byte to INTERNAL, recognizing
46 only ASCII characters. */
47 wint_t
48 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
50 if (c < 0x80)
51 return c;
52 else
53 return WEOF;
57 /* Transform from the internal, UCS4-like format, to UCS4. The
58 difference between the internal ucs4 format and the real UCS4
59 format is, if any, the endianness. The Unicode/ISO 10646 says that
60 unless some higher protocol specifies it differently, the byte
61 order is big endian.*/
62 #define DEFINE_INIT 0
63 #define DEFINE_FINI 0
64 #define MIN_NEEDED_FROM 4
65 #define MIN_NEEDED_TO 4
66 #define FROM_DIRECTION 1
67 #define FROM_LOOP internal_ucs4_loop
68 #define TO_LOOP internal_ucs4_loop /* This is not used. */
69 #define FUNCTION_NAME __gconv_transform_internal_ucs4
70 #define ONE_DIRECTION 0
73 static inline int
74 __attribute ((always_inline))
75 internal_ucs4_loop (struct __gconv_step *step,
76 struct __gconv_step_data *step_data,
77 const unsigned char **inptrp, const unsigned char *inend,
78 unsigned char **outptrp, const unsigned char *outend,
79 size_t *irreversible)
81 const unsigned char *inptr = *inptrp;
82 unsigned char *outptr = *outptrp;
83 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
84 int result;
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 /* Sigh, we have to do some real work. */
88 size_t cnt;
90 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
92 uint32_t val = get32 (inptr);
93 put32 (outptr, __builtin_bswap32 (val));
96 *inptrp = inptr;
97 *outptrp = outptr;
98 #elif __BYTE_ORDER == __BIG_ENDIAN
99 /* Simply copy the data. */
100 *inptrp = inptr + n_convert * 4;
101 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
102 #else
103 # error "This endianness is not supported."
104 #endif
106 /* Determine the status. */
107 if (*inptrp == inend)
108 result = __GCONV_EMPTY_INPUT;
109 else if (*outptrp + 4 > outend)
110 result = __GCONV_FULL_OUTPUT;
111 else
112 result = __GCONV_INCOMPLETE_INPUT;
114 return result;
118 static inline int
119 __attribute ((always_inline))
120 internal_ucs4_loop_single (struct __gconv_step *step,
121 struct __gconv_step_data *step_data,
122 const unsigned char **inptrp,
123 const unsigned char *inend,
124 unsigned char **outptrp,
125 const unsigned char *outend,
126 size_t *irreversible)
128 mbstate_t *state = step_data->__statep;
129 size_t cnt = state->__count & 7;
131 while (*inptrp < inend && cnt < 4)
132 state->__value.__wchb[cnt++] = *(*inptrp)++;
134 if (__glibc_unlikely (cnt < 4))
136 /* Still not enough bytes. Store the ones in the input buffer. */
137 state->__count &= ~7;
138 state->__count |= cnt;
140 return __GCONV_INCOMPLETE_INPUT;
143 #if __BYTE_ORDER == __LITTLE_ENDIAN
144 (*outptrp)[0] = state->__value.__wchb[3];
145 (*outptrp)[1] = state->__value.__wchb[2];
146 (*outptrp)[2] = state->__value.__wchb[1];
147 (*outptrp)[3] = state->__value.__wchb[0];
149 #elif __BYTE_ORDER == __BIG_ENDIAN
150 /* XXX unaligned */
151 (*outptrp)[0] = state->__value.__wchb[0];
152 (*outptrp)[1] = state->__value.__wchb[1];
153 (*outptrp)[2] = state->__value.__wchb[2];
154 (*outptrp)[3] = state->__value.__wchb[3];
155 #else
156 # error "This endianness is not supported."
157 #endif
158 *outptrp += 4;
160 /* Clear the state buffer. */
161 state->__count &= ~7;
163 return __GCONV_OK;
166 #include <iconv/skeleton.c>
169 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
170 for the other direction we have to check for correct values here. */
171 #define DEFINE_INIT 0
172 #define DEFINE_FINI 0
173 #define MIN_NEEDED_FROM 4
174 #define MIN_NEEDED_TO 4
175 #define FROM_DIRECTION 1
176 #define FROM_LOOP ucs4_internal_loop
177 #define TO_LOOP ucs4_internal_loop /* This is not used. */
178 #define FUNCTION_NAME __gconv_transform_ucs4_internal
179 #define ONE_DIRECTION 0
182 static inline int
183 __attribute ((always_inline))
184 ucs4_internal_loop (struct __gconv_step *step,
185 struct __gconv_step_data *step_data,
186 const unsigned char **inptrp, const unsigned char *inend,
187 unsigned char **outptrp, const unsigned char *outend,
188 size_t *irreversible)
190 int flags = step_data->__flags;
191 const unsigned char *inptr = *inptrp;
192 unsigned char *outptr = *outptrp;
193 int result;
195 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
197 uint32_t inval = get32 (inptr);
198 #if __BYTE_ORDER == __LITTLE_ENDIAN
199 inval = __builtin_bswap32 (inval);
200 #endif
202 if (__glibc_unlikely (inval > 0x7fffffff))
204 /* The value is too large. We don't try transliteration here since
205 this is not an error because of the lack of possibilities to
206 represent the result. This is a genuine bug in the input since
207 UCS4 does not allow such values. */
208 if (irreversible == NULL)
209 /* We are transliterating, don't try to correct anything. */
210 return __GCONV_ILLEGAL_INPUT;
212 if (flags & __GCONV_IGNORE_ERRORS)
214 /* Just ignore this character. */
215 ++*irreversible;
216 continue;
219 *inptrp = inptr;
220 *outptrp = outptr;
221 return __GCONV_ILLEGAL_INPUT;
224 put32 (outptr, inval);
225 outptr += sizeof (uint32_t);
228 *inptrp = inptr;
229 *outptrp = outptr;
231 /* Determine the status. */
232 if (*inptrp == inend)
233 result = __GCONV_EMPTY_INPUT;
234 else if (*outptrp + 4 > outend)
235 result = __GCONV_FULL_OUTPUT;
236 else
237 result = __GCONV_INCOMPLETE_INPUT;
239 return result;
243 static inline int
244 __attribute ((always_inline))
245 ucs4_internal_loop_single (struct __gconv_step *step,
246 struct __gconv_step_data *step_data,
247 const unsigned char **inptrp,
248 const unsigned char *inend,
249 unsigned char **outptrp,
250 const unsigned char *outend,
251 size_t *irreversible)
253 mbstate_t *state = step_data->__statep;
254 int flags = step_data->__flags;
255 size_t cnt = state->__count & 7;
257 while (*inptrp < inend && cnt < 4)
258 state->__value.__wchb[cnt++] = *(*inptrp)++;
260 if (__glibc_unlikely (cnt < 4))
262 /* Still not enough bytes. Store the ones in the input buffer. */
263 state->__count &= ~7;
264 state->__count |= cnt;
266 return __GCONV_INCOMPLETE_INPUT;
269 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
272 /* The value is too large. We don't try transliteration here since
273 this is not an error because of the lack of possibilities to
274 represent the result. This is a genuine bug in the input since
275 UCS4 does not allow such values. */
276 if (!(flags & __GCONV_IGNORE_ERRORS))
278 *inptrp -= cnt - (state->__count & 7);
279 return __GCONV_ILLEGAL_INPUT;
282 else
284 #if __BYTE_ORDER == __LITTLE_ENDIAN
285 (*outptrp)[0] = state->__value.__wchb[3];
286 (*outptrp)[1] = state->__value.__wchb[2];
287 (*outptrp)[2] = state->__value.__wchb[1];
288 (*outptrp)[3] = state->__value.__wchb[0];
289 #elif __BYTE_ORDER == __BIG_ENDIAN
290 (*outptrp)[0] = state->__value.__wchb[0];
291 (*outptrp)[1] = state->__value.__wchb[1];
292 (*outptrp)[2] = state->__value.__wchb[2];
293 (*outptrp)[3] = state->__value.__wchb[3];
294 #endif
296 *outptrp += 4;
299 /* Clear the state buffer. */
300 state->__count &= ~7;
302 return __GCONV_OK;
305 #include <iconv/skeleton.c>
308 /* Similarly for the little endian form. */
309 #define DEFINE_INIT 0
310 #define DEFINE_FINI 0
311 #define MIN_NEEDED_FROM 4
312 #define MIN_NEEDED_TO 4
313 #define FROM_DIRECTION 1
314 #define FROM_LOOP internal_ucs4le_loop
315 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
316 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
317 #define ONE_DIRECTION 0
320 static inline int
321 __attribute ((always_inline))
322 internal_ucs4le_loop (struct __gconv_step *step,
323 struct __gconv_step_data *step_data,
324 const unsigned char **inptrp, const unsigned char *inend,
325 unsigned char **outptrp, const unsigned char *outend,
326 size_t *irreversible)
328 const unsigned char *inptr = *inptrp;
329 unsigned char *outptr = *outptrp;
330 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
331 int result;
333 #if __BYTE_ORDER == __BIG_ENDIAN
334 /* Sigh, we have to do some real work. */
335 size_t cnt;
337 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
339 uint32_t val = get32 (inptr);
340 put32 (outptr, __builtin_bswap32 (val));
343 *inptrp = inptr;
344 *outptrp = outptr;
345 #elif __BYTE_ORDER == __LITTLE_ENDIAN
346 /* Simply copy the data. */
347 *inptrp = inptr + n_convert * 4;
348 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
349 #else
350 # error "This endianness is not supported."
351 #endif
353 /* Determine the status. */
354 if (*inptrp == inend)
355 result = __GCONV_EMPTY_INPUT;
356 else if (*outptrp + 4 > outend)
357 result = __GCONV_FULL_OUTPUT;
358 else
359 result = __GCONV_INCOMPLETE_INPUT;
361 return result;
365 static inline int
366 __attribute ((always_inline))
367 internal_ucs4le_loop_single (struct __gconv_step *step,
368 struct __gconv_step_data *step_data,
369 const unsigned char **inptrp,
370 const unsigned char *inend,
371 unsigned char **outptrp,
372 const unsigned char *outend,
373 size_t *irreversible)
375 mbstate_t *state = step_data->__statep;
376 size_t cnt = state->__count & 7;
378 while (*inptrp < inend && cnt < 4)
379 state->__value.__wchb[cnt++] = *(*inptrp)++;
381 if (__glibc_unlikely (cnt < 4))
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state->__count &= ~7;
385 state->__count |= cnt;
387 return __GCONV_INCOMPLETE_INPUT;
390 #if __BYTE_ORDER == __BIG_ENDIAN
391 (*outptrp)[0] = state->__value.__wchb[3];
392 (*outptrp)[1] = state->__value.__wchb[2];
393 (*outptrp)[2] = state->__value.__wchb[1];
394 (*outptrp)[3] = state->__value.__wchb[0];
396 #else
397 /* XXX unaligned */
398 (*outptrp)[0] = state->__value.__wchb[0];
399 (*outptrp)[1] = state->__value.__wchb[1];
400 (*outptrp)[2] = state->__value.__wchb[2];
401 (*outptrp)[3] = state->__value.__wchb[3];
403 #endif
405 *outptrp += 4;
407 /* Clear the state buffer. */
408 state->__count &= ~7;
410 return __GCONV_OK;
413 #include <iconv/skeleton.c>
416 /* And finally from UCS4-LE to the internal encoding. */
417 #define DEFINE_INIT 0
418 #define DEFINE_FINI 0
419 #define MIN_NEEDED_FROM 4
420 #define MIN_NEEDED_TO 4
421 #define FROM_DIRECTION 1
422 #define FROM_LOOP ucs4le_internal_loop
423 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
424 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
425 #define ONE_DIRECTION 0
428 static inline int
429 __attribute ((always_inline))
430 ucs4le_internal_loop (struct __gconv_step *step,
431 struct __gconv_step_data *step_data,
432 const unsigned char **inptrp, const unsigned char *inend,
433 unsigned char **outptrp, const unsigned char *outend,
434 size_t *irreversible)
436 int flags = step_data->__flags;
437 const unsigned char *inptr = *inptrp;
438 unsigned char *outptr = *outptrp;
439 int result;
441 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
443 uint32_t inval = get32 (inptr);
444 #if __BYTE_ORDER == __BIG_ENDIAN
445 inval = __builtin_bswap32 (inval);
446 #endif
448 if (__glibc_unlikely (inval > 0x7fffffff))
450 /* The value is too large. We don't try transliteration here since
451 this is not an error because of the lack of possibilities to
452 represent the result. This is a genuine bug in the input since
453 UCS4 does not allow such values. */
454 if (irreversible == NULL)
455 /* We are transliterating, don't try to correct anything. */
456 return __GCONV_ILLEGAL_INPUT;
458 if (flags & __GCONV_IGNORE_ERRORS)
460 /* Just ignore this character. */
461 ++*irreversible;
462 continue;
465 *inptrp = inptr;
466 *outptrp = outptr;
467 return __GCONV_ILLEGAL_INPUT;
470 put32 (outptr, inval);
471 outptr += sizeof (uint32_t);
474 *inptrp = inptr;
475 *outptrp = outptr;
477 /* Determine the status. */
478 if (*inptrp == inend)
479 result = __GCONV_EMPTY_INPUT;
480 else if (*inptrp + 4 > inend)
481 result = __GCONV_INCOMPLETE_INPUT;
482 else
484 assert (*outptrp + 4 > outend);
485 result = __GCONV_FULL_OUTPUT;
488 return result;
492 static inline int
493 __attribute ((always_inline))
494 ucs4le_internal_loop_single (struct __gconv_step *step,
495 struct __gconv_step_data *step_data,
496 const unsigned char **inptrp,
497 const unsigned char *inend,
498 unsigned char **outptrp,
499 const unsigned char *outend,
500 size_t *irreversible)
502 mbstate_t *state = step_data->__statep;
503 int flags = step_data->__flags;
504 size_t cnt = state->__count & 7;
506 while (*inptrp < inend && cnt < 4)
507 state->__value.__wchb[cnt++] = *(*inptrp)++;
509 if (__glibc_unlikely (cnt < 4))
511 /* Still not enough bytes. Store the ones in the input buffer. */
512 state->__count &= ~7;
513 state->__count |= cnt;
515 return __GCONV_INCOMPLETE_INPUT;
518 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
521 /* The value is too large. We don't try transliteration here since
522 this is not an error because of the lack of possibilities to
523 represent the result. This is a genuine bug in the input since
524 UCS4 does not allow such values. */
525 if (!(flags & __GCONV_IGNORE_ERRORS))
526 return __GCONV_ILLEGAL_INPUT;
528 else
530 #if __BYTE_ORDER == __BIG_ENDIAN
531 (*outptrp)[0] = state->__value.__wchb[3];
532 (*outptrp)[1] = state->__value.__wchb[2];
533 (*outptrp)[2] = state->__value.__wchb[1];
534 (*outptrp)[3] = state->__value.__wchb[0];
535 #else
536 (*outptrp)[0] = state->__value.__wchb[0];
537 (*outptrp)[1] = state->__value.__wchb[1];
538 (*outptrp)[2] = state->__value.__wchb[2];
539 (*outptrp)[3] = state->__value.__wchb[3];
540 #endif
542 *outptrp += 4;
545 /* Clear the state buffer. */
546 state->__count &= ~7;
548 return __GCONV_OK;
551 #include <iconv/skeleton.c>
554 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
555 #define DEFINE_INIT 0
556 #define DEFINE_FINI 0
557 #define MIN_NEEDED_FROM 1
558 #define MIN_NEEDED_TO 4
559 #define FROM_DIRECTION 1
560 #define FROM_LOOP ascii_internal_loop
561 #define TO_LOOP ascii_internal_loop /* This is not used. */
562 #define FUNCTION_NAME __gconv_transform_ascii_internal
563 #define ONE_DIRECTION 1
565 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
566 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
567 #define LOOPFCT FROM_LOOP
568 #define BODY \
570 if (__glibc_unlikely (*inptr > '\x7f')) \
572 /* The value is too large. We don't try transliteration here since \
573 this is not an error because of the lack of possibilities to \
574 represent the result. This is a genuine bug in the input since \
575 ASCII does not allow such values. */ \
576 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
578 else \
580 /* It's an one byte sequence. */ \
581 *((uint32_t *) outptr) = *inptr++; \
582 outptr += sizeof (uint32_t); \
585 #define LOOP_NEED_FLAGS
586 #include <iconv/loop.c>
587 #include <iconv/skeleton.c>
590 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
591 #define DEFINE_INIT 0
592 #define DEFINE_FINI 0
593 #define MIN_NEEDED_FROM 4
594 #define MIN_NEEDED_TO 1
595 #define FROM_DIRECTION 1
596 #define FROM_LOOP internal_ascii_loop
597 #define TO_LOOP internal_ascii_loop /* This is not used. */
598 #define FUNCTION_NAME __gconv_transform_internal_ascii
599 #define ONE_DIRECTION 1
601 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
602 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
603 #define LOOPFCT FROM_LOOP
604 #define BODY \
606 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
608 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
609 STANDARD_TO_LOOP_ERR_HANDLER (4); \
611 else \
613 /* It's an one byte sequence. */ \
614 *outptr++ = *((const uint32_t *) inptr); \
615 inptr += sizeof (uint32_t); \
618 #define LOOP_NEED_FLAGS
619 #include <iconv/loop.c>
620 #include <iconv/skeleton.c>
623 /* Convert from the internal (UCS4-like) format to UTF-8. */
624 #define DEFINE_INIT 0
625 #define DEFINE_FINI 0
626 #define MIN_NEEDED_FROM 4
627 #define MIN_NEEDED_TO 1
628 #define MAX_NEEDED_TO 6
629 #define FROM_DIRECTION 1
630 #define FROM_LOOP internal_utf8_loop
631 #define TO_LOOP internal_utf8_loop /* This is not used. */
632 #define FUNCTION_NAME __gconv_transform_internal_utf8
633 #define ONE_DIRECTION 1
635 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
636 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
637 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
638 #define LOOPFCT FROM_LOOP
639 #define BODY \
641 uint32_t wc = *((const uint32_t *) inptr); \
643 if (__glibc_likely (wc < 0x80)) \
644 /* It's an one byte sequence. */ \
645 *outptr++ = (unsigned char) wc; \
646 else if (__glibc_likely (wc <= 0x7fffffff \
647 && (wc < 0xd800 || wc > 0xdfff))) \
649 size_t step; \
650 unsigned char *start; \
652 for (step = 2; step < 6; ++step) \
653 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
654 break; \
656 if (__glibc_unlikely (outptr + step > outend)) \
658 /* Too long. */ \
659 result = __GCONV_FULL_OUTPUT; \
660 break; \
663 start = outptr; \
664 *outptr = (unsigned char) (~0xff >> step); \
665 outptr += step; \
666 do \
668 start[--step] = 0x80 | (wc & 0x3f); \
669 wc >>= 6; \
671 while (step > 1); \
672 start[0] |= wc; \
674 else \
676 STANDARD_TO_LOOP_ERR_HANDLER (4); \
679 inptr += 4; \
681 #define LOOP_NEED_FLAGS
682 #include <iconv/loop.c>
683 #include <iconv/skeleton.c>
686 /* Convert from UTF-8 to the internal (UCS4-like) format. */
687 #define DEFINE_INIT 0
688 #define DEFINE_FINI 0
689 #define MIN_NEEDED_FROM 1
690 #define MAX_NEEDED_FROM 6
691 #define MIN_NEEDED_TO 4
692 #define FROM_DIRECTION 1
693 #define FROM_LOOP utf8_internal_loop
694 #define TO_LOOP utf8_internal_loop /* This is not used. */
695 #define FUNCTION_NAME __gconv_transform_utf8_internal
696 #define ONE_DIRECTION 1
698 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
699 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
700 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
701 #define LOOPFCT FROM_LOOP
702 #define BODY \
704 /* Next input byte. */ \
705 uint32_t ch = *inptr; \
707 if (__glibc_likely (ch < 0x80)) \
709 /* One byte sequence. */ \
710 ++inptr; \
712 else \
714 unsigned int cnt; \
715 unsigned int i; \
717 if (ch >= 0xc2 && ch < 0xe0) \
719 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
720 otherwise the wide character could have been represented \
721 using a single byte. */ \
722 cnt = 2; \
723 ch &= 0x1f; \
725 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
727 /* We expect three bytes. */ \
728 cnt = 3; \
729 ch &= 0x0f; \
731 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
733 /* We expect four bytes. */ \
734 cnt = 4; \
735 ch &= 0x07; \
737 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
739 /* We expect five bytes. */ \
740 cnt = 5; \
741 ch &= 0x03; \
743 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
745 /* We expect six bytes. */ \
746 cnt = 6; \
747 ch &= 0x01; \
749 else \
751 /* Search the end of this ill-formed UTF-8 character. This \
752 is the next byte with (x & 0xc0) != 0x80. */ \
753 i = 0; \
754 do \
755 ++i; \
756 while (inptr + i < inend \
757 && (*(inptr + i) & 0xc0) == 0x80 \
758 && i < 5); \
760 errout: \
761 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
764 if (__glibc_unlikely (inptr + cnt > inend)) \
766 /* We don't have enough input. But before we report that check \
767 that all the bytes are correct. */ \
768 for (i = 1; inptr + i < inend; ++i) \
769 if ((inptr[i] & 0xc0) != 0x80) \
770 break; \
772 if (__glibc_likely (inptr + i == inend)) \
774 result = __GCONV_INCOMPLETE_INPUT; \
775 break; \
778 goto errout; \
781 /* Read the possible remaining bytes. */ \
782 for (i = 1; i < cnt; ++i) \
784 uint32_t byte = inptr[i]; \
786 if ((byte & 0xc0) != 0x80) \
787 /* This is an illegal encoding. */ \
788 break; \
790 ch <<= 6; \
791 ch |= byte & 0x3f; \
794 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
795 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
796 have been represented with fewer than cnt bytes. */ \
797 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
798 /* Do not accept UTF-16 surrogates. */ \
799 || (ch >= 0xd800 && ch <= 0xdfff)) \
801 /* This is an illegal encoding. */ \
802 goto errout; \
805 inptr += cnt; \
808 /* Now adjust the pointers and store the result. */ \
809 *((uint32_t *) outptr) = ch; \
810 outptr += sizeof (uint32_t); \
812 #define LOOP_NEED_FLAGS
814 #define STORE_REST \
816 /* We store the remaining bytes while converting them into the UCS4 \
817 format. We can assume that the first byte in the buffer is \
818 correct and that it requires a larger number of bytes than there \
819 are in the input buffer. */ \
820 wint_t ch = **inptrp; \
821 size_t cnt, r; \
823 state->__count = inend - *inptrp; \
825 assert (ch != 0xc0 && ch != 0xc1); \
826 if (ch >= 0xc2 && ch < 0xe0) \
828 /* We expect two bytes. The first byte cannot be 0xc0 or \
829 0xc1, otherwise the wide character could have been \
830 represented using a single byte. */ \
831 cnt = 2; \
832 ch &= 0x1f; \
834 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
836 /* We expect three bytes. */ \
837 cnt = 3; \
838 ch &= 0x0f; \
840 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
842 /* We expect four bytes. */ \
843 cnt = 4; \
844 ch &= 0x07; \
846 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
848 /* We expect five bytes. */ \
849 cnt = 5; \
850 ch &= 0x03; \
852 else \
854 /* We expect six bytes. */ \
855 cnt = 6; \
856 ch &= 0x01; \
859 /* The first byte is already consumed. */ \
860 r = cnt - 1; \
861 while (++(*inptrp) < inend) \
863 ch <<= 6; \
864 ch |= **inptrp & 0x3f; \
865 --r; \
868 /* Shift for the so far missing bytes. */ \
869 ch <<= r * 6; \
871 /* Store the number of bytes expected for the entire sequence. */ \
872 state->__count |= cnt << 8; \
874 /* Store the value. */ \
875 state->__value.__wch = ch; \
878 #define UNPACK_BYTES \
880 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
881 wint_t wch = state->__value.__wch; \
882 size_t ntotal = state->__count >> 8; \
884 inlen = state->__count & 255; \
886 bytebuf[0] = inmask[ntotal - 2]; \
888 do \
890 if (--ntotal < inlen) \
891 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
892 wch >>= 6; \
894 while (ntotal > 1); \
896 bytebuf[0] |= wch; \
899 #define CLEAR_STATE \
900 state->__count = 0
903 #include <iconv/loop.c>
904 #include <iconv/skeleton.c>
907 /* Convert from UCS2 to the internal (UCS4-like) format. */
908 #define DEFINE_INIT 0
909 #define DEFINE_FINI 0
910 #define MIN_NEEDED_FROM 2
911 #define MIN_NEEDED_TO 4
912 #define FROM_DIRECTION 1
913 #define FROM_LOOP ucs2_internal_loop
914 #define TO_LOOP ucs2_internal_loop /* This is not used. */
915 #define FUNCTION_NAME __gconv_transform_ucs2_internal
916 #define ONE_DIRECTION 1
918 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
919 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
920 #define LOOPFCT FROM_LOOP
921 #define BODY \
923 uint16_t u1 = get16 (inptr); \
925 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
927 /* Surrogate characters in UCS-2 input are not valid. Reject \
928 them. (Catching this here is not security relevant.) */ \
929 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
932 *((uint32_t *) outptr) = u1; \
933 outptr += sizeof (uint32_t); \
934 inptr += 2; \
936 #define LOOP_NEED_FLAGS
937 #include <iconv/loop.c>
938 #include <iconv/skeleton.c>
941 /* Convert from the internal (UCS4-like) format to UCS2. */
942 #define DEFINE_INIT 0
943 #define DEFINE_FINI 0
944 #define MIN_NEEDED_FROM 4
945 #define MIN_NEEDED_TO 2
946 #define FROM_DIRECTION 1
947 #define FROM_LOOP internal_ucs2_loop
948 #define TO_LOOP internal_ucs2_loop /* This is not used. */
949 #define FUNCTION_NAME __gconv_transform_internal_ucs2
950 #define ONE_DIRECTION 1
952 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
953 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
954 #define LOOPFCT FROM_LOOP
955 #define BODY \
957 uint32_t val = *((const uint32_t *) inptr); \
959 if (__glibc_unlikely (val >= 0x10000)) \
961 UNICODE_TAG_HANDLER (val, 4); \
962 STANDARD_TO_LOOP_ERR_HANDLER (4); \
964 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
966 /* Surrogate characters in UCS-4 input are not valid. \
967 We must catch this, because the UCS-2 output might be \
968 interpreted as UTF-16 by other programs. If we let \
969 surrogates pass through, attackers could make a security \
970 hole exploit by synthesizing any desired plane 1-16 \
971 character. */ \
972 result = __GCONV_ILLEGAL_INPUT; \
973 if (! ignore_errors_p ()) \
974 break; \
975 inptr += 4; \
976 ++*irreversible; \
977 continue; \
979 else \
981 put16 (outptr, val); \
982 outptr += sizeof (uint16_t); \
983 inptr += 4; \
986 #define LOOP_NEED_FLAGS
987 #include <iconv/loop.c>
988 #include <iconv/skeleton.c>
991 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
992 #define DEFINE_INIT 0
993 #define DEFINE_FINI 0
994 #define MIN_NEEDED_FROM 2
995 #define MIN_NEEDED_TO 4
996 #define FROM_DIRECTION 1
997 #define FROM_LOOP ucs2reverse_internal_loop
998 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
999 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1000 #define ONE_DIRECTION 1
1002 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1003 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1004 #define LOOPFCT FROM_LOOP
1005 #define BODY \
1007 uint16_t u1 = bswap_16 (get16 (inptr)); \
1009 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1011 /* Surrogate characters in UCS-2 input are not valid. Reject \
1012 them. (Catching this here is not security relevant.) */ \
1013 if (! ignore_errors_p ()) \
1015 result = __GCONV_ILLEGAL_INPUT; \
1016 break; \
1018 inptr += 2; \
1019 ++*irreversible; \
1020 continue; \
1023 *((uint32_t *) outptr) = u1; \
1024 outptr += sizeof (uint32_t); \
1025 inptr += 2; \
1027 #define LOOP_NEED_FLAGS
1028 #include <iconv/loop.c>
1029 #include <iconv/skeleton.c>
1032 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1033 #define DEFINE_INIT 0
1034 #define DEFINE_FINI 0
1035 #define MIN_NEEDED_FROM 4
1036 #define MIN_NEEDED_TO 2
1037 #define FROM_DIRECTION 1
1038 #define FROM_LOOP internal_ucs2reverse_loop
1039 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1040 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1041 #define ONE_DIRECTION 1
1043 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1044 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1045 #define LOOPFCT FROM_LOOP
1046 #define BODY \
1048 uint32_t val = *((const uint32_t *) inptr); \
1049 if (__glibc_unlikely (val >= 0x10000)) \
1051 UNICODE_TAG_HANDLER (val, 4); \
1052 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1054 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1056 /* Surrogate characters in UCS-4 input are not valid. \
1057 We must catch this, because the UCS-2 output might be \
1058 interpreted as UTF-16 by other programs. If we let \
1059 surrogates pass through, attackers could make a security \
1060 hole exploit by synthesizing any desired plane 1-16 \
1061 character. */ \
1062 if (! ignore_errors_p ()) \
1064 result = __GCONV_ILLEGAL_INPUT; \
1065 break; \
1067 inptr += 4; \
1068 ++*irreversible; \
1069 continue; \
1071 else \
1073 put16 (outptr, bswap_16 (val)); \
1074 outptr += sizeof (uint16_t); \
1075 inptr += 4; \
1078 #define LOOP_NEED_FLAGS
1079 #include <iconv/loop.c>
1080 #include <iconv/skeleton.c>