1 /* Simple transformations functions.
2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
28 #include <sys/param.h>
29 #include <gconv_int.h>
31 #define BUILTIN_ALIAS(s1, s2) /* nothing */
32 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
33 MinF, MaxF, MinT, MaxT) \
34 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
35 const unsigned char **, const unsigned char *, \
36 unsigned char **, size_t *, int, int);
37 #include "gconv_builtin.h"
41 # define EILSEQ EINVAL
45 /* Specialized conversion function for a single byte to INTERNAL, recognizing
46 only ASCII characters. */
48 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
57 /* Transform from the internal, UCS4-like format, to UCS4. The
58 difference between the internal ucs4 format and the real UCS4
59 format is, if any, the endianness. The Unicode/ISO 10646 says that
60 unless some higher protocol specifies it differently, the byte
61 order is big endian.*/
64 #define MIN_NEEDED_FROM 4
65 #define MIN_NEEDED_TO 4
66 #define FROM_DIRECTION 1
67 #define FROM_LOOP internal_ucs4_loop
68 #define TO_LOOP internal_ucs4_loop /* This is not used. */
69 #define FUNCTION_NAME __gconv_transform_internal_ucs4
70 #define ONE_DIRECTION 0
74 __attribute ((always_inline
))
75 internal_ucs4_loop (struct __gconv_step
*step
,
76 struct __gconv_step_data
*step_data
,
77 const unsigned char **inptrp
, const unsigned char *inend
,
78 unsigned char **outptrp
, const unsigned char *outend
,
81 const unsigned char *inptr
= *inptrp
;
82 unsigned char *outptr
= *outptrp
;
83 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 /* Sigh, we have to do some real work. */
90 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
92 uint32_t val
= get32 (inptr
);
93 put32 (outptr
, __builtin_bswap32 (val
));
98 #elif __BYTE_ORDER == __BIG_ENDIAN
99 /* Simply copy the data. */
100 *inptrp
= inptr
+ n_convert
* 4;
101 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
103 # error "This endianness is not supported."
106 /* Determine the status. */
107 if (*inptrp
== inend
)
108 result
= __GCONV_EMPTY_INPUT
;
109 else if (*outptrp
+ 4 > outend
)
110 result
= __GCONV_FULL_OUTPUT
;
112 result
= __GCONV_INCOMPLETE_INPUT
;
119 __attribute ((always_inline
))
120 internal_ucs4_loop_single (struct __gconv_step
*step
,
121 struct __gconv_step_data
*step_data
,
122 const unsigned char **inptrp
,
123 const unsigned char *inend
,
124 unsigned char **outptrp
,
125 const unsigned char *outend
,
126 size_t *irreversible
)
128 mbstate_t *state
= step_data
->__statep
;
129 size_t cnt
= state
->__count
& 7;
131 while (*inptrp
< inend
&& cnt
< 4)
132 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
134 if (__glibc_unlikely (cnt
< 4))
136 /* Still not enough bytes. Store the ones in the input buffer. */
137 state
->__count
&= ~7;
138 state
->__count
|= cnt
;
140 return __GCONV_INCOMPLETE_INPUT
;
143 #if __BYTE_ORDER == __LITTLE_ENDIAN
144 (*outptrp
)[0] = state
->__value
.__wchb
[3];
145 (*outptrp
)[1] = state
->__value
.__wchb
[2];
146 (*outptrp
)[2] = state
->__value
.__wchb
[1];
147 (*outptrp
)[3] = state
->__value
.__wchb
[0];
149 #elif __BYTE_ORDER == __BIG_ENDIAN
151 (*outptrp
)[0] = state
->__value
.__wchb
[0];
152 (*outptrp
)[1] = state
->__value
.__wchb
[1];
153 (*outptrp
)[2] = state
->__value
.__wchb
[2];
154 (*outptrp
)[3] = state
->__value
.__wchb
[3];
156 # error "This endianness is not supported."
160 /* Clear the state buffer. */
161 state
->__count
&= ~7;
166 #include <iconv/skeleton.c>
169 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
170 for the other direction we have to check for correct values here. */
171 #define DEFINE_INIT 0
172 #define DEFINE_FINI 0
173 #define MIN_NEEDED_FROM 4
174 #define MIN_NEEDED_TO 4
175 #define FROM_DIRECTION 1
176 #define FROM_LOOP ucs4_internal_loop
177 #define TO_LOOP ucs4_internal_loop /* This is not used. */
178 #define FUNCTION_NAME __gconv_transform_ucs4_internal
179 #define ONE_DIRECTION 0
183 __attribute ((always_inline
))
184 ucs4_internal_loop (struct __gconv_step
*step
,
185 struct __gconv_step_data
*step_data
,
186 const unsigned char **inptrp
, const unsigned char *inend
,
187 unsigned char **outptrp
, const unsigned char *outend
,
188 size_t *irreversible
)
190 int flags
= step_data
->__flags
;
191 const unsigned char *inptr
= *inptrp
;
192 unsigned char *outptr
= *outptrp
;
195 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
197 uint32_t inval
= get32 (inptr
);
198 #if __BYTE_ORDER == __LITTLE_ENDIAN
199 inval
= __builtin_bswap32 (inval
);
202 if (__glibc_unlikely (inval
> 0x7fffffff))
204 /* The value is too large. We don't try transliteration here since
205 this is not an error because of the lack of possibilities to
206 represent the result. This is a genuine bug in the input since
207 UCS4 does not allow such values. */
208 if (irreversible
== NULL
)
209 /* We are transliterating, don't try to correct anything. */
210 return __GCONV_ILLEGAL_INPUT
;
212 if (flags
& __GCONV_IGNORE_ERRORS
)
214 /* Just ignore this character. */
221 return __GCONV_ILLEGAL_INPUT
;
224 put32 (outptr
, inval
);
225 outptr
+= sizeof (uint32_t);
231 /* Determine the status. */
232 if (*inptrp
== inend
)
233 result
= __GCONV_EMPTY_INPUT
;
234 else if (*outptrp
+ 4 > outend
)
235 result
= __GCONV_FULL_OUTPUT
;
237 result
= __GCONV_INCOMPLETE_INPUT
;
244 __attribute ((always_inline
))
245 ucs4_internal_loop_single (struct __gconv_step
*step
,
246 struct __gconv_step_data
*step_data
,
247 const unsigned char **inptrp
,
248 const unsigned char *inend
,
249 unsigned char **outptrp
,
250 const unsigned char *outend
,
251 size_t *irreversible
)
253 mbstate_t *state
= step_data
->__statep
;
254 int flags
= step_data
->__flags
;
255 size_t cnt
= state
->__count
& 7;
257 while (*inptrp
< inend
&& cnt
< 4)
258 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
260 if (__glibc_unlikely (cnt
< 4))
262 /* Still not enough bytes. Store the ones in the input buffer. */
263 state
->__count
&= ~7;
264 state
->__count
|= cnt
;
266 return __GCONV_INCOMPLETE_INPUT
;
269 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
272 /* The value is too large. We don't try transliteration here since
273 this is not an error because of the lack of possibilities to
274 represent the result. This is a genuine bug in the input since
275 UCS4 does not allow such values. */
276 if (!(flags
& __GCONV_IGNORE_ERRORS
))
278 *inptrp
-= cnt
- (state
->__count
& 7);
279 return __GCONV_ILLEGAL_INPUT
;
284 #if __BYTE_ORDER == __LITTLE_ENDIAN
285 (*outptrp
)[0] = state
->__value
.__wchb
[3];
286 (*outptrp
)[1] = state
->__value
.__wchb
[2];
287 (*outptrp
)[2] = state
->__value
.__wchb
[1];
288 (*outptrp
)[3] = state
->__value
.__wchb
[0];
289 #elif __BYTE_ORDER == __BIG_ENDIAN
290 (*outptrp
)[0] = state
->__value
.__wchb
[0];
291 (*outptrp
)[1] = state
->__value
.__wchb
[1];
292 (*outptrp
)[2] = state
->__value
.__wchb
[2];
293 (*outptrp
)[3] = state
->__value
.__wchb
[3];
299 /* Clear the state buffer. */
300 state
->__count
&= ~7;
305 #include <iconv/skeleton.c>
308 /* Similarly for the little endian form. */
309 #define DEFINE_INIT 0
310 #define DEFINE_FINI 0
311 #define MIN_NEEDED_FROM 4
312 #define MIN_NEEDED_TO 4
313 #define FROM_DIRECTION 1
314 #define FROM_LOOP internal_ucs4le_loop
315 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
316 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
317 #define ONE_DIRECTION 0
321 __attribute ((always_inline
))
322 internal_ucs4le_loop (struct __gconv_step
*step
,
323 struct __gconv_step_data
*step_data
,
324 const unsigned char **inptrp
, const unsigned char *inend
,
325 unsigned char **outptrp
, const unsigned char *outend
,
326 size_t *irreversible
)
328 const unsigned char *inptr
= *inptrp
;
329 unsigned char *outptr
= *outptrp
;
330 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
333 #if __BYTE_ORDER == __BIG_ENDIAN
334 /* Sigh, we have to do some real work. */
337 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
339 uint32_t val
= get32 (inptr
);
340 put32 (outptr
, __builtin_bswap32 (val
));
345 #elif __BYTE_ORDER == __LITTLE_ENDIAN
346 /* Simply copy the data. */
347 *inptrp
= inptr
+ n_convert
* 4;
348 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
350 # error "This endianness is not supported."
353 /* Determine the status. */
354 if (*inptrp
== inend
)
355 result
= __GCONV_EMPTY_INPUT
;
356 else if (*outptrp
+ 4 > outend
)
357 result
= __GCONV_FULL_OUTPUT
;
359 result
= __GCONV_INCOMPLETE_INPUT
;
366 __attribute ((always_inline
))
367 internal_ucs4le_loop_single (struct __gconv_step
*step
,
368 struct __gconv_step_data
*step_data
,
369 const unsigned char **inptrp
,
370 const unsigned char *inend
,
371 unsigned char **outptrp
,
372 const unsigned char *outend
,
373 size_t *irreversible
)
375 mbstate_t *state
= step_data
->__statep
;
376 size_t cnt
= state
->__count
& 7;
378 while (*inptrp
< inend
&& cnt
< 4)
379 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
381 if (__glibc_unlikely (cnt
< 4))
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state
->__count
&= ~7;
385 state
->__count
|= cnt
;
387 return __GCONV_INCOMPLETE_INPUT
;
390 #if __BYTE_ORDER == __BIG_ENDIAN
391 (*outptrp
)[0] = state
->__value
.__wchb
[3];
392 (*outptrp
)[1] = state
->__value
.__wchb
[2];
393 (*outptrp
)[2] = state
->__value
.__wchb
[1];
394 (*outptrp
)[3] = state
->__value
.__wchb
[0];
398 (*outptrp
)[0] = state
->__value
.__wchb
[0];
399 (*outptrp
)[1] = state
->__value
.__wchb
[1];
400 (*outptrp
)[2] = state
->__value
.__wchb
[2];
401 (*outptrp
)[3] = state
->__value
.__wchb
[3];
407 /* Clear the state buffer. */
408 state
->__count
&= ~7;
413 #include <iconv/skeleton.c>
416 /* And finally from UCS4-LE to the internal encoding. */
417 #define DEFINE_INIT 0
418 #define DEFINE_FINI 0
419 #define MIN_NEEDED_FROM 4
420 #define MIN_NEEDED_TO 4
421 #define FROM_DIRECTION 1
422 #define FROM_LOOP ucs4le_internal_loop
423 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
424 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
425 #define ONE_DIRECTION 0
429 __attribute ((always_inline
))
430 ucs4le_internal_loop (struct __gconv_step
*step
,
431 struct __gconv_step_data
*step_data
,
432 const unsigned char **inptrp
, const unsigned char *inend
,
433 unsigned char **outptrp
, const unsigned char *outend
,
434 size_t *irreversible
)
436 int flags
= step_data
->__flags
;
437 const unsigned char *inptr
= *inptrp
;
438 unsigned char *outptr
= *outptrp
;
441 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
443 uint32_t inval
= get32 (inptr
);
444 #if __BYTE_ORDER == __BIG_ENDIAN
445 inval
= __builtin_bswap32 (inval
);
448 if (__glibc_unlikely (inval
> 0x7fffffff))
450 /* The value is too large. We don't try transliteration here since
451 this is not an error because of the lack of possibilities to
452 represent the result. This is a genuine bug in the input since
453 UCS4 does not allow such values. */
454 if (irreversible
== NULL
)
455 /* We are transliterating, don't try to correct anything. */
456 return __GCONV_ILLEGAL_INPUT
;
458 if (flags
& __GCONV_IGNORE_ERRORS
)
460 /* Just ignore this character. */
467 return __GCONV_ILLEGAL_INPUT
;
470 put32 (outptr
, inval
);
471 outptr
+= sizeof (uint32_t);
477 /* Determine the status. */
478 if (*inptrp
== inend
)
479 result
= __GCONV_EMPTY_INPUT
;
480 else if (*inptrp
+ 4 > inend
)
481 result
= __GCONV_INCOMPLETE_INPUT
;
484 assert (*outptrp
+ 4 > outend
);
485 result
= __GCONV_FULL_OUTPUT
;
493 __attribute ((always_inline
))
494 ucs4le_internal_loop_single (struct __gconv_step
*step
,
495 struct __gconv_step_data
*step_data
,
496 const unsigned char **inptrp
,
497 const unsigned char *inend
,
498 unsigned char **outptrp
,
499 const unsigned char *outend
,
500 size_t *irreversible
)
502 mbstate_t *state
= step_data
->__statep
;
503 int flags
= step_data
->__flags
;
504 size_t cnt
= state
->__count
& 7;
506 while (*inptrp
< inend
&& cnt
< 4)
507 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
509 if (__glibc_unlikely (cnt
< 4))
511 /* Still not enough bytes. Store the ones in the input buffer. */
512 state
->__count
&= ~7;
513 state
->__count
|= cnt
;
515 return __GCONV_INCOMPLETE_INPUT
;
518 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
521 /* The value is too large. We don't try transliteration here since
522 this is not an error because of the lack of possibilities to
523 represent the result. This is a genuine bug in the input since
524 UCS4 does not allow such values. */
525 if (!(flags
& __GCONV_IGNORE_ERRORS
))
526 return __GCONV_ILLEGAL_INPUT
;
530 #if __BYTE_ORDER == __BIG_ENDIAN
531 (*outptrp
)[0] = state
->__value
.__wchb
[3];
532 (*outptrp
)[1] = state
->__value
.__wchb
[2];
533 (*outptrp
)[2] = state
->__value
.__wchb
[1];
534 (*outptrp
)[3] = state
->__value
.__wchb
[0];
536 (*outptrp
)[0] = state
->__value
.__wchb
[0];
537 (*outptrp
)[1] = state
->__value
.__wchb
[1];
538 (*outptrp
)[2] = state
->__value
.__wchb
[2];
539 (*outptrp
)[3] = state
->__value
.__wchb
[3];
545 /* Clear the state buffer. */
546 state
->__count
&= ~7;
551 #include <iconv/skeleton.c>
554 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
555 #define DEFINE_INIT 0
556 #define DEFINE_FINI 0
557 #define MIN_NEEDED_FROM 1
558 #define MIN_NEEDED_TO 4
559 #define FROM_DIRECTION 1
560 #define FROM_LOOP ascii_internal_loop
561 #define TO_LOOP ascii_internal_loop /* This is not used. */
562 #define FUNCTION_NAME __gconv_transform_ascii_internal
563 #define ONE_DIRECTION 1
565 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
566 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
567 #define LOOPFCT FROM_LOOP
570 if (__glibc_unlikely (*inptr > '\x7f')) \
572 /* The value is too large. We don't try transliteration here since \
573 this is not an error because of the lack of possibilities to \
574 represent the result. This is a genuine bug in the input since \
575 ASCII does not allow such values. */ \
576 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
580 /* It's an one byte sequence. */ \
581 *((uint32_t *) outptr) = *inptr++; \
582 outptr += sizeof (uint32_t); \
585 #define LOOP_NEED_FLAGS
586 #include <iconv/loop.c>
587 #include <iconv/skeleton.c>
590 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
591 #define DEFINE_INIT 0
592 #define DEFINE_FINI 0
593 #define MIN_NEEDED_FROM 4
594 #define MIN_NEEDED_TO 1
595 #define FROM_DIRECTION 1
596 #define FROM_LOOP internal_ascii_loop
597 #define TO_LOOP internal_ascii_loop /* This is not used. */
598 #define FUNCTION_NAME __gconv_transform_internal_ascii
599 #define ONE_DIRECTION 1
601 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
602 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
603 #define LOOPFCT FROM_LOOP
606 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
608 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
609 STANDARD_TO_LOOP_ERR_HANDLER (4); \
613 /* It's an one byte sequence. */ \
614 *outptr++ = *((const uint32_t *) inptr); \
615 inptr += sizeof (uint32_t); \
618 #define LOOP_NEED_FLAGS
619 #include <iconv/loop.c>
620 #include <iconv/skeleton.c>
623 /* Convert from the internal (UCS4-like) format to UTF-8. */
624 #define DEFINE_INIT 0
625 #define DEFINE_FINI 0
626 #define MIN_NEEDED_FROM 4
627 #define MIN_NEEDED_TO 1
628 #define MAX_NEEDED_TO 6
629 #define FROM_DIRECTION 1
630 #define FROM_LOOP internal_utf8_loop
631 #define TO_LOOP internal_utf8_loop /* This is not used. */
632 #define FUNCTION_NAME __gconv_transform_internal_utf8
633 #define ONE_DIRECTION 1
635 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
636 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
637 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
638 #define LOOPFCT FROM_LOOP
641 uint32_t wc = *((const uint32_t *) inptr); \
643 if (__glibc_likely (wc < 0x80)) \
644 /* It's an one byte sequence. */ \
645 *outptr++ = (unsigned char) wc; \
646 else if (__glibc_likely (wc <= 0x7fffffff \
647 && (wc < 0xd800 || wc > 0xdfff))) \
650 unsigned char *start; \
652 for (step = 2; step < 6; ++step) \
653 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
656 if (__glibc_unlikely (outptr + step > outend)) \
659 result = __GCONV_FULL_OUTPUT; \
664 *outptr = (unsigned char) (~0xff >> step); \
668 start[--step] = 0x80 | (wc & 0x3f); \
676 STANDARD_TO_LOOP_ERR_HANDLER (4); \
681 #define LOOP_NEED_FLAGS
682 #include <iconv/loop.c>
683 #include <iconv/skeleton.c>
686 /* Convert from UTF-8 to the internal (UCS4-like) format. */
687 #define DEFINE_INIT 0
688 #define DEFINE_FINI 0
689 #define MIN_NEEDED_FROM 1
690 #define MAX_NEEDED_FROM 6
691 #define MIN_NEEDED_TO 4
692 #define FROM_DIRECTION 1
693 #define FROM_LOOP utf8_internal_loop
694 #define TO_LOOP utf8_internal_loop /* This is not used. */
695 #define FUNCTION_NAME __gconv_transform_utf8_internal
696 #define ONE_DIRECTION 1
698 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
699 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
700 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
701 #define LOOPFCT FROM_LOOP
704 /* Next input byte. */ \
705 uint32_t ch = *inptr; \
707 if (__glibc_likely (ch < 0x80)) \
709 /* One byte sequence. */ \
717 if (ch >= 0xc2 && ch < 0xe0) \
719 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
720 otherwise the wide character could have been represented \
721 using a single byte. */ \
725 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
727 /* We expect three bytes. */ \
731 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
733 /* We expect four bytes. */ \
737 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
739 /* We expect five bytes. */ \
743 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
745 /* We expect six bytes. */ \
751 /* Search the end of this ill-formed UTF-8 character. This \
752 is the next byte with (x & 0xc0) != 0x80. */ \
756 while (inptr + i < inend \
757 && (*(inptr + i) & 0xc0) == 0x80 \
761 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
764 if (__glibc_unlikely (inptr + cnt > inend)) \
766 /* We don't have enough input. But before we report that check \
767 that all the bytes are correct. */ \
768 for (i = 1; inptr + i < inend; ++i) \
769 if ((inptr[i] & 0xc0) != 0x80) \
772 if (__glibc_likely (inptr + i == inend)) \
774 result = __GCONV_INCOMPLETE_INPUT; \
781 /* Read the possible remaining bytes. */ \
782 for (i = 1; i < cnt; ++i) \
784 uint32_t byte = inptr[i]; \
786 if ((byte & 0xc0) != 0x80) \
787 /* This is an illegal encoding. */ \
794 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
795 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
796 have been represented with fewer than cnt bytes. */ \
797 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
798 /* Do not accept UTF-16 surrogates. */ \
799 || (ch >= 0xd800 && ch <= 0xdfff)) \
801 /* This is an illegal encoding. */ \
808 /* Now adjust the pointers and store the result. */ \
809 *((uint32_t *) outptr) = ch; \
810 outptr += sizeof (uint32_t); \
812 #define LOOP_NEED_FLAGS
816 /* We store the remaining bytes while converting them into the UCS4 \
817 format. We can assume that the first byte in the buffer is \
818 correct and that it requires a larger number of bytes than there \
819 are in the input buffer. */ \
820 wint_t ch = **inptrp; \
823 state->__count = inend - *inptrp; \
825 assert (ch != 0xc0 && ch != 0xc1); \
826 if (ch >= 0xc2 && ch < 0xe0) \
828 /* We expect two bytes. The first byte cannot be 0xc0 or \
829 0xc1, otherwise the wide character could have been \
830 represented using a single byte. */ \
834 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
836 /* We expect three bytes. */ \
840 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
842 /* We expect four bytes. */ \
846 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
848 /* We expect five bytes. */ \
854 /* We expect six bytes. */ \
859 /* The first byte is already consumed. */ \
861 while (++(*inptrp) < inend) \
864 ch |= **inptrp & 0x3f; \
868 /* Shift for the so far missing bytes. */ \
871 /* Store the number of bytes expected for the entire sequence. */ \
872 state->__count |= cnt << 8; \
874 /* Store the value. */ \
875 state->__value.__wch = ch; \
878 #define UNPACK_BYTES \
880 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
881 wint_t wch = state->__value.__wch; \
882 size_t ntotal = state->__count >> 8; \
884 inlen = state->__count & 255; \
886 bytebuf[0] = inmask[ntotal - 2]; \
890 if (--ntotal < inlen) \
891 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
894 while (ntotal > 1); \
899 #define CLEAR_STATE \
903 #include <iconv/loop.c>
904 #include <iconv/skeleton.c>
907 /* Convert from UCS2 to the internal (UCS4-like) format. */
908 #define DEFINE_INIT 0
909 #define DEFINE_FINI 0
910 #define MIN_NEEDED_FROM 2
911 #define MIN_NEEDED_TO 4
912 #define FROM_DIRECTION 1
913 #define FROM_LOOP ucs2_internal_loop
914 #define TO_LOOP ucs2_internal_loop /* This is not used. */
915 #define FUNCTION_NAME __gconv_transform_ucs2_internal
916 #define ONE_DIRECTION 1
918 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
919 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
920 #define LOOPFCT FROM_LOOP
923 uint16_t u1 = get16 (inptr); \
925 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
927 /* Surrogate characters in UCS-2 input are not valid. Reject \
928 them. (Catching this here is not security relevant.) */ \
929 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
932 *((uint32_t *) outptr) = u1; \
933 outptr += sizeof (uint32_t); \
936 #define LOOP_NEED_FLAGS
937 #include <iconv/loop.c>
938 #include <iconv/skeleton.c>
941 /* Convert from the internal (UCS4-like) format to UCS2. */
942 #define DEFINE_INIT 0
943 #define DEFINE_FINI 0
944 #define MIN_NEEDED_FROM 4
945 #define MIN_NEEDED_TO 2
946 #define FROM_DIRECTION 1
947 #define FROM_LOOP internal_ucs2_loop
948 #define TO_LOOP internal_ucs2_loop /* This is not used. */
949 #define FUNCTION_NAME __gconv_transform_internal_ucs2
950 #define ONE_DIRECTION 1
952 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
953 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
954 #define LOOPFCT FROM_LOOP
957 uint32_t val = *((const uint32_t *) inptr); \
959 if (__glibc_unlikely (val >= 0x10000)) \
961 UNICODE_TAG_HANDLER (val, 4); \
962 STANDARD_TO_LOOP_ERR_HANDLER (4); \
964 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
966 /* Surrogate characters in UCS-4 input are not valid. \
967 We must catch this, because the UCS-2 output might be \
968 interpreted as UTF-16 by other programs. If we let \
969 surrogates pass through, attackers could make a security \
970 hole exploit by synthesizing any desired plane 1-16 \
972 result = __GCONV_ILLEGAL_INPUT; \
973 if (! ignore_errors_p ()) \
981 put16 (outptr, val); \
982 outptr += sizeof (uint16_t); \
986 #define LOOP_NEED_FLAGS
987 #include <iconv/loop.c>
988 #include <iconv/skeleton.c>
991 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
992 #define DEFINE_INIT 0
993 #define DEFINE_FINI 0
994 #define MIN_NEEDED_FROM 2
995 #define MIN_NEEDED_TO 4
996 #define FROM_DIRECTION 1
997 #define FROM_LOOP ucs2reverse_internal_loop
998 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
999 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1000 #define ONE_DIRECTION 1
1002 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1003 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1004 #define LOOPFCT FROM_LOOP
1007 uint16_t u1 = bswap_16 (get16 (inptr)); \
1009 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1011 /* Surrogate characters in UCS-2 input are not valid. Reject \
1012 them. (Catching this here is not security relevant.) */ \
1013 if (! ignore_errors_p ()) \
1015 result = __GCONV_ILLEGAL_INPUT; \
1023 *((uint32_t *) outptr) = u1; \
1024 outptr += sizeof (uint32_t); \
1027 #define LOOP_NEED_FLAGS
1028 #include <iconv/loop.c>
1029 #include <iconv/skeleton.c>
1032 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1033 #define DEFINE_INIT 0
1034 #define DEFINE_FINI 0
1035 #define MIN_NEEDED_FROM 4
1036 #define MIN_NEEDED_TO 2
1037 #define FROM_DIRECTION 1
1038 #define FROM_LOOP internal_ucs2reverse_loop
1039 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1040 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1041 #define ONE_DIRECTION 1
1043 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1044 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1045 #define LOOPFCT FROM_LOOP
1048 uint32_t val = *((const uint32_t *) inptr); \
1049 if (__glibc_unlikely (val >= 0x10000)) \
1051 UNICODE_TAG_HANDLER (val, 4); \
1052 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1054 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1056 /* Surrogate characters in UCS-4 input are not valid. \
1057 We must catch this, because the UCS-2 output might be \
1058 interpreted as UTF-16 by other programs. If we let \
1059 surrogates pass through, attackers could make a security \
1060 hole exploit by synthesizing any desired plane 1-16 \
1062 if (! ignore_errors_p ()) \
1064 result = __GCONV_ILLEGAL_INPUT; \
1073 put16 (outptr, bswap_16 (val)); \
1074 outptr += sizeof (uint16_t); \
1078 #define LOOP_NEED_FLAGS
1079 #include <iconv/loop.c>
1080 #include <iconv/skeleton.c>