1 /* Simple transformations functions.
2 Copyright (C) 1997-2021 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
29 #include <sys/param.h>
30 #include <gconv_int.h>
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 const unsigned char **, const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
42 # define EILSEQ EINVAL
46 /* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
49 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
58 /* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
65 #define MIN_NEEDED_FROM 4
66 #define MIN_NEEDED_TO 4
67 #define FROM_DIRECTION 1
68 #define FROM_LOOP internal_ucs4_loop
69 #define TO_LOOP internal_ucs4_loop /* This is not used. */
70 #define FUNCTION_NAME __gconv_transform_internal_ucs4
71 #define ONE_DIRECTION 0
75 __attribute ((always_inline
))
76 internal_ucs4_loop (struct __gconv_step
*step
,
77 struct __gconv_step_data
*step_data
,
78 const unsigned char **inptrp
, const unsigned char *inend
,
79 unsigned char **outptrp
, const unsigned char *outend
,
82 const unsigned char *inptr
= *inptrp
;
83 unsigned char *outptr
= *outptrp
;
84 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
90 uint32_t *outptr32
= (uint32_t *) outptr
;
92 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
93 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
96 *outptrp
= (unsigned char *) outptr32
;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp
= inptr
+ n_convert
* 4;
100 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
102 # error "This endianess is not supported."
105 /* Determine the status. */
106 if (*inptrp
== inend
)
107 result
= __GCONV_EMPTY_INPUT
;
108 else if (*outptrp
+ 4 > outend
)
109 result
= __GCONV_FULL_OUTPUT
;
111 result
= __GCONV_INCOMPLETE_INPUT
;
116 #if !_STRING_ARCH_unaligned
118 __attribute ((always_inline
))
119 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
120 struct __gconv_step_data
*step_data
,
121 const unsigned char **inptrp
,
122 const unsigned char *inend
,
123 unsigned char **outptrp
,
124 const unsigned char *outend
,
125 size_t *irreversible
)
127 const unsigned char *inptr
= *inptrp
;
128 unsigned char *outptr
= *outptrp
;
129 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
132 # if __BYTE_ORDER == __LITTLE_ENDIAN
133 /* Sigh, we have to do some real work. */
136 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
138 outptr
[0] = inptr
[3];
139 outptr
[1] = inptr
[2];
140 outptr
[2] = inptr
[1];
141 outptr
[3] = inptr
[0];
146 # elif __BYTE_ORDER == __BIG_ENDIAN
147 /* Simply copy the data. */
148 *inptrp
= inptr
+ n_convert
* 4;
149 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
151 # error "This endianess is not supported."
154 /* Determine the status. */
155 if (*inptrp
== inend
)
156 result
= __GCONV_EMPTY_INPUT
;
157 else if (*outptrp
+ 4 > outend
)
158 result
= __GCONV_FULL_OUTPUT
;
160 result
= __GCONV_INCOMPLETE_INPUT
;
168 __attribute ((always_inline
))
169 internal_ucs4_loop_single (struct __gconv_step
*step
,
170 struct __gconv_step_data
*step_data
,
171 const unsigned char **inptrp
,
172 const unsigned char *inend
,
173 unsigned char **outptrp
,
174 const unsigned char *outend
,
175 size_t *irreversible
)
177 mbstate_t *state
= step_data
->__statep
;
178 size_t cnt
= state
->__count
& 7;
180 while (*inptrp
< inend
&& cnt
< 4)
181 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
183 if (__glibc_unlikely (cnt
< 4))
185 /* Still not enough bytes. Store the ones in the input buffer. */
186 state
->__count
&= ~7;
187 state
->__count
|= cnt
;
189 return __GCONV_INCOMPLETE_INPUT
;
192 #if __BYTE_ORDER == __LITTLE_ENDIAN
193 (*outptrp
)[0] = state
->__value
.__wchb
[3];
194 (*outptrp
)[1] = state
->__value
.__wchb
[2];
195 (*outptrp
)[2] = state
->__value
.__wchb
[1];
196 (*outptrp
)[3] = state
->__value
.__wchb
[0];
198 #elif __BYTE_ORDER == __BIG_ENDIAN
200 (*outptrp
)[0] = state
->__value
.__wchb
[0];
201 (*outptrp
)[1] = state
->__value
.__wchb
[1];
202 (*outptrp
)[2] = state
->__value
.__wchb
[2];
203 (*outptrp
)[3] = state
->__value
.__wchb
[3];
205 # error "This endianess is not supported."
209 /* Clear the state buffer. */
210 state
->__count
&= ~7;
215 #include <iconv/skeleton.c>
218 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
219 for the other direction we have to check for correct values here. */
220 #define DEFINE_INIT 0
221 #define DEFINE_FINI 0
222 #define MIN_NEEDED_FROM 4
223 #define MIN_NEEDED_TO 4
224 #define FROM_DIRECTION 1
225 #define FROM_LOOP ucs4_internal_loop
226 #define TO_LOOP ucs4_internal_loop /* This is not used. */
227 #define FUNCTION_NAME __gconv_transform_ucs4_internal
228 #define ONE_DIRECTION 0
232 __attribute ((always_inline
))
233 ucs4_internal_loop (struct __gconv_step
*step
,
234 struct __gconv_step_data
*step_data
,
235 const unsigned char **inptrp
, const unsigned char *inend
,
236 unsigned char **outptrp
, const unsigned char *outend
,
237 size_t *irreversible
)
239 int flags
= step_data
->__flags
;
240 const unsigned char *inptr
= *inptrp
;
241 unsigned char *outptr
= *outptrp
;
244 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
248 #if __BYTE_ORDER == __LITTLE_ENDIAN
249 inval
= bswap_32 (*(const uint32_t *) inptr
);
251 inval
= *(const uint32_t *) inptr
;
254 if (__glibc_unlikely (inval
> 0x7fffffff))
256 /* The value is too large. We don't try transliteration here since
257 this is not an error because of the lack of possibilities to
258 represent the result. This is a genuine bug in the input since
259 UCS4 does not allow such values. */
260 if (irreversible
== NULL
)
261 /* We are transliterating, don't try to correct anything. */
262 return __GCONV_ILLEGAL_INPUT
;
264 if (flags
& __GCONV_IGNORE_ERRORS
)
266 /* Just ignore this character. */
273 return __GCONV_ILLEGAL_INPUT
;
276 *((uint32_t *) outptr
) = inval
;
277 outptr
+= sizeof (uint32_t);
283 /* Determine the status. */
284 if (*inptrp
== inend
)
285 result
= __GCONV_EMPTY_INPUT
;
286 else if (*outptrp
+ 4 > outend
)
287 result
= __GCONV_FULL_OUTPUT
;
289 result
= __GCONV_INCOMPLETE_INPUT
;
294 #if !_STRING_ARCH_unaligned
296 __attribute ((always_inline
))
297 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
298 struct __gconv_step_data
*step_data
,
299 const unsigned char **inptrp
,
300 const unsigned char *inend
,
301 unsigned char **outptrp
,
302 const unsigned char *outend
,
303 size_t *irreversible
)
305 int flags
= step_data
->__flags
;
306 const unsigned char *inptr
= *inptrp
;
307 unsigned char *outptr
= *outptrp
;
310 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
312 if (__glibc_unlikely (inptr
[0] > 0x80))
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
318 if (irreversible
== NULL
)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT
;
322 if (flags
& __GCONV_IGNORE_ERRORS
)
324 /* Just ignore this character. */
331 return __GCONV_ILLEGAL_INPUT
;
334 # if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr
[3] = inptr
[0];
336 outptr
[2] = inptr
[1];
337 outptr
[1] = inptr
[2];
338 outptr
[0] = inptr
[3];
340 outptr
[0] = inptr
[0];
341 outptr
[1] = inptr
[1];
342 outptr
[2] = inptr
[2];
343 outptr
[3] = inptr
[3];
351 /* Determine the status. */
352 if (*inptrp
== inend
)
353 result
= __GCONV_EMPTY_INPUT
;
354 else if (*outptrp
+ 4 > outend
)
355 result
= __GCONV_FULL_OUTPUT
;
357 result
= __GCONV_INCOMPLETE_INPUT
;
365 __attribute ((always_inline
))
366 ucs4_internal_loop_single (struct __gconv_step
*step
,
367 struct __gconv_step_data
*step_data
,
368 const unsigned char **inptrp
,
369 const unsigned char *inend
,
370 unsigned char **outptrp
,
371 const unsigned char *outend
,
372 size_t *irreversible
)
374 mbstate_t *state
= step_data
->__statep
;
375 int flags
= step_data
->__flags
;
376 size_t cnt
= state
->__count
& 7;
378 while (*inptrp
< inend
&& cnt
< 4)
379 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
381 if (__glibc_unlikely (cnt
< 4))
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state
->__count
&= ~7;
385 state
->__count
|= cnt
;
387 return __GCONV_INCOMPLETE_INPUT
;
390 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
393 /* The value is too large. We don't try transliteration here since
394 this is not an error because of the lack of possibilities to
395 represent the result. This is a genuine bug in the input since
396 UCS4 does not allow such values. */
397 if (!(flags
& __GCONV_IGNORE_ERRORS
))
399 *inptrp
-= cnt
- (state
->__count
& 7);
400 return __GCONV_ILLEGAL_INPUT
;
405 #if __BYTE_ORDER == __LITTLE_ENDIAN
406 (*outptrp
)[0] = state
->__value
.__wchb
[3];
407 (*outptrp
)[1] = state
->__value
.__wchb
[2];
408 (*outptrp
)[2] = state
->__value
.__wchb
[1];
409 (*outptrp
)[3] = state
->__value
.__wchb
[0];
410 #elif __BYTE_ORDER == __BIG_ENDIAN
411 (*outptrp
)[0] = state
->__value
.__wchb
[0];
412 (*outptrp
)[1] = state
->__value
.__wchb
[1];
413 (*outptrp
)[2] = state
->__value
.__wchb
[2];
414 (*outptrp
)[3] = state
->__value
.__wchb
[3];
420 /* Clear the state buffer. */
421 state
->__count
&= ~7;
426 #include <iconv/skeleton.c>
429 /* Similarly for the little endian form. */
430 #define DEFINE_INIT 0
431 #define DEFINE_FINI 0
432 #define MIN_NEEDED_FROM 4
433 #define MIN_NEEDED_TO 4
434 #define FROM_DIRECTION 1
435 #define FROM_LOOP internal_ucs4le_loop
436 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
437 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
438 #define ONE_DIRECTION 0
442 __attribute ((always_inline
))
443 internal_ucs4le_loop (struct __gconv_step
*step
,
444 struct __gconv_step_data
*step_data
,
445 const unsigned char **inptrp
, const unsigned char *inend
,
446 unsigned char **outptrp
, const unsigned char *outend
,
447 size_t *irreversible
)
449 const unsigned char *inptr
= *inptrp
;
450 unsigned char *outptr
= *outptrp
;
451 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
454 #if __BYTE_ORDER == __BIG_ENDIAN
455 /* Sigh, we have to do some real work. */
457 uint32_t *outptr32
= (uint32_t *) outptr
;
459 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
460 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
461 outptr
= (unsigned char *) outptr32
;
465 #elif __BYTE_ORDER == __LITTLE_ENDIAN
466 /* Simply copy the data. */
467 *inptrp
= inptr
+ n_convert
* 4;
468 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
470 # error "This endianess is not supported."
473 /* Determine the status. */
474 if (*inptrp
== inend
)
475 result
= __GCONV_EMPTY_INPUT
;
476 else if (*outptrp
+ 4 > outend
)
477 result
= __GCONV_FULL_OUTPUT
;
479 result
= __GCONV_INCOMPLETE_INPUT
;
484 #if !_STRING_ARCH_unaligned
486 __attribute ((always_inline
))
487 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
488 struct __gconv_step_data
*step_data
,
489 const unsigned char **inptrp
,
490 const unsigned char *inend
,
491 unsigned char **outptrp
,
492 const unsigned char *outend
,
493 size_t *irreversible
)
495 const unsigned char *inptr
= *inptrp
;
496 unsigned char *outptr
= *outptrp
;
497 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
500 # if __BYTE_ORDER == __BIG_ENDIAN
501 /* Sigh, we have to do some real work. */
504 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
506 outptr
[0] = inptr
[3];
507 outptr
[1] = inptr
[2];
508 outptr
[2] = inptr
[1];
509 outptr
[3] = inptr
[0];
514 # elif __BYTE_ORDER == __LITTLE_ENDIAN
515 /* Simply copy the data. */
516 *inptrp
= inptr
+ n_convert
* 4;
517 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
519 # error "This endianess is not supported."
522 /* Determine the status. */
523 if (*inptrp
== inend
)
524 result
= __GCONV_EMPTY_INPUT
;
525 else if (*inptrp
+ 4 > inend
)
526 result
= __GCONV_INCOMPLETE_INPUT
;
529 assert (*outptrp
+ 4 > outend
);
530 result
= __GCONV_FULL_OUTPUT
;
539 __attribute ((always_inline
))
540 internal_ucs4le_loop_single (struct __gconv_step
*step
,
541 struct __gconv_step_data
*step_data
,
542 const unsigned char **inptrp
,
543 const unsigned char *inend
,
544 unsigned char **outptrp
,
545 const unsigned char *outend
,
546 size_t *irreversible
)
548 mbstate_t *state
= step_data
->__statep
;
549 size_t cnt
= state
->__count
& 7;
551 while (*inptrp
< inend
&& cnt
< 4)
552 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
554 if (__glibc_unlikely (cnt
< 4))
556 /* Still not enough bytes. Store the ones in the input buffer. */
557 state
->__count
&= ~7;
558 state
->__count
|= cnt
;
560 return __GCONV_INCOMPLETE_INPUT
;
563 #if __BYTE_ORDER == __BIG_ENDIAN
564 (*outptrp
)[0] = state
->__value
.__wchb
[3];
565 (*outptrp
)[1] = state
->__value
.__wchb
[2];
566 (*outptrp
)[2] = state
->__value
.__wchb
[1];
567 (*outptrp
)[3] = state
->__value
.__wchb
[0];
571 (*outptrp
)[0] = state
->__value
.__wchb
[0];
572 (*outptrp
)[1] = state
->__value
.__wchb
[1];
573 (*outptrp
)[2] = state
->__value
.__wchb
[2];
574 (*outptrp
)[3] = state
->__value
.__wchb
[3];
580 /* Clear the state buffer. */
581 state
->__count
&= ~7;
586 #include <iconv/skeleton.c>
589 /* And finally from UCS4-LE to the internal encoding. */
590 #define DEFINE_INIT 0
591 #define DEFINE_FINI 0
592 #define MIN_NEEDED_FROM 4
593 #define MIN_NEEDED_TO 4
594 #define FROM_DIRECTION 1
595 #define FROM_LOOP ucs4le_internal_loop
596 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
597 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
598 #define ONE_DIRECTION 0
602 __attribute ((always_inline
))
603 ucs4le_internal_loop (struct __gconv_step
*step
,
604 struct __gconv_step_data
*step_data
,
605 const unsigned char **inptrp
, const unsigned char *inend
,
606 unsigned char **outptrp
, const unsigned char *outend
,
607 size_t *irreversible
)
609 int flags
= step_data
->__flags
;
610 const unsigned char *inptr
= *inptrp
;
611 unsigned char *outptr
= *outptrp
;
614 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
618 #if __BYTE_ORDER == __BIG_ENDIAN
619 inval
= bswap_32 (*(const uint32_t *) inptr
);
621 inval
= *(const uint32_t *) inptr
;
624 if (__glibc_unlikely (inval
> 0x7fffffff))
626 /* The value is too large. We don't try transliteration here since
627 this is not an error because of the lack of possibilities to
628 represent the result. This is a genuine bug in the input since
629 UCS4 does not allow such values. */
630 if (irreversible
== NULL
)
631 /* We are transliterating, don't try to correct anything. */
632 return __GCONV_ILLEGAL_INPUT
;
634 if (flags
& __GCONV_IGNORE_ERRORS
)
636 /* Just ignore this character. */
643 return __GCONV_ILLEGAL_INPUT
;
646 *((uint32_t *) outptr
) = inval
;
647 outptr
+= sizeof (uint32_t);
653 /* Determine the status. */
654 if (*inptrp
== inend
)
655 result
= __GCONV_EMPTY_INPUT
;
656 else if (*inptrp
+ 4 > inend
)
657 result
= __GCONV_INCOMPLETE_INPUT
;
660 assert (*outptrp
+ 4 > outend
);
661 result
= __GCONV_FULL_OUTPUT
;
667 #if !_STRING_ARCH_unaligned
669 __attribute ((always_inline
))
670 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
671 struct __gconv_step_data
*step_data
,
672 const unsigned char **inptrp
,
673 const unsigned char *inend
,
674 unsigned char **outptrp
,
675 const unsigned char *outend
,
676 size_t *irreversible
)
678 int flags
= step_data
->__flags
;
679 const unsigned char *inptr
= *inptrp
;
680 unsigned char *outptr
= *outptrp
;
683 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
685 if (__glibc_unlikely (inptr
[3] > 0x80))
687 /* The value is too large. We don't try transliteration here since
688 this is not an error because of the lack of possibilities to
689 represent the result. This is a genuine bug in the input since
690 UCS4 does not allow such values. */
691 if (irreversible
== NULL
)
692 /* We are transliterating, don't try to correct anything. */
693 return __GCONV_ILLEGAL_INPUT
;
695 if (flags
& __GCONV_IGNORE_ERRORS
)
697 /* Just ignore this character. */
704 return __GCONV_ILLEGAL_INPUT
;
707 # if __BYTE_ORDER == __BIG_ENDIAN
708 outptr
[3] = inptr
[0];
709 outptr
[2] = inptr
[1];
710 outptr
[1] = inptr
[2];
711 outptr
[0] = inptr
[3];
713 outptr
[0] = inptr
[0];
714 outptr
[1] = inptr
[1];
715 outptr
[2] = inptr
[2];
716 outptr
[3] = inptr
[3];
725 /* Determine the status. */
726 if (*inptrp
== inend
)
727 result
= __GCONV_EMPTY_INPUT
;
728 else if (*inptrp
+ 4 > inend
)
729 result
= __GCONV_INCOMPLETE_INPUT
;
732 assert (*outptrp
+ 4 > outend
);
733 result
= __GCONV_FULL_OUTPUT
;
742 __attribute ((always_inline
))
743 ucs4le_internal_loop_single (struct __gconv_step
*step
,
744 struct __gconv_step_data
*step_data
,
745 const unsigned char **inptrp
,
746 const unsigned char *inend
,
747 unsigned char **outptrp
,
748 const unsigned char *outend
,
749 size_t *irreversible
)
751 mbstate_t *state
= step_data
->__statep
;
752 int flags
= step_data
->__flags
;
753 size_t cnt
= state
->__count
& 7;
755 while (*inptrp
< inend
&& cnt
< 4)
756 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
758 if (__glibc_unlikely (cnt
< 4))
760 /* Still not enough bytes. Store the ones in the input buffer. */
761 state
->__count
&= ~7;
762 state
->__count
|= cnt
;
764 return __GCONV_INCOMPLETE_INPUT
;
767 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
770 /* The value is too large. We don't try transliteration here since
771 this is not an error because of the lack of possibilities to
772 represent the result. This is a genuine bug in the input since
773 UCS4 does not allow such values. */
774 if (!(flags
& __GCONV_IGNORE_ERRORS
))
775 return __GCONV_ILLEGAL_INPUT
;
779 #if __BYTE_ORDER == __BIG_ENDIAN
780 (*outptrp
)[0] = state
->__value
.__wchb
[3];
781 (*outptrp
)[1] = state
->__value
.__wchb
[2];
782 (*outptrp
)[2] = state
->__value
.__wchb
[1];
783 (*outptrp
)[3] = state
->__value
.__wchb
[0];
785 (*outptrp
)[0] = state
->__value
.__wchb
[0];
786 (*outptrp
)[1] = state
->__value
.__wchb
[1];
787 (*outptrp
)[2] = state
->__value
.__wchb
[2];
788 (*outptrp
)[3] = state
->__value
.__wchb
[3];
794 /* Clear the state buffer. */
795 state
->__count
&= ~7;
800 #include <iconv/skeleton.c>
803 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
804 #define DEFINE_INIT 0
805 #define DEFINE_FINI 0
806 #define MIN_NEEDED_FROM 1
807 #define MIN_NEEDED_TO 4
808 #define FROM_DIRECTION 1
809 #define FROM_LOOP ascii_internal_loop
810 #define TO_LOOP ascii_internal_loop /* This is not used. */
811 #define FUNCTION_NAME __gconv_transform_ascii_internal
812 #define ONE_DIRECTION 1
814 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
815 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
816 #define LOOPFCT FROM_LOOP
819 if (__glibc_unlikely (*inptr > '\x7f')) \
821 /* The value is too large. We don't try transliteration here since \
822 this is not an error because of the lack of possibilities to \
823 represent the result. This is a genuine bug in the input since \
824 ASCII does not allow such values. */ \
825 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
829 /* It's an one byte sequence. */ \
830 *((uint32_t *) outptr) = *inptr++; \
831 outptr += sizeof (uint32_t); \
834 #define LOOP_NEED_FLAGS
835 #include <iconv/loop.c>
836 #include <iconv/skeleton.c>
839 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
840 #define DEFINE_INIT 0
841 #define DEFINE_FINI 0
842 #define MIN_NEEDED_FROM 4
843 #define MIN_NEEDED_TO 1
844 #define FROM_DIRECTION 1
845 #define FROM_LOOP internal_ascii_loop
846 #define TO_LOOP internal_ascii_loop /* This is not used. */
847 #define FUNCTION_NAME __gconv_transform_internal_ascii
848 #define ONE_DIRECTION 1
850 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
851 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
852 #define LOOPFCT FROM_LOOP
855 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
857 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
858 STANDARD_TO_LOOP_ERR_HANDLER (4); \
862 /* It's an one byte sequence. */ \
863 *outptr++ = *((const uint32_t *) inptr); \
864 inptr += sizeof (uint32_t); \
867 #define LOOP_NEED_FLAGS
868 #include <iconv/loop.c>
869 #include <iconv/skeleton.c>
872 /* Convert from the internal (UCS4-like) format to UTF-8. */
873 #define DEFINE_INIT 0
874 #define DEFINE_FINI 0
875 #define MIN_NEEDED_FROM 4
876 #define MIN_NEEDED_TO 1
877 #define MAX_NEEDED_TO 6
878 #define FROM_DIRECTION 1
879 #define FROM_LOOP internal_utf8_loop
880 #define TO_LOOP internal_utf8_loop /* This is not used. */
881 #define FUNCTION_NAME __gconv_transform_internal_utf8
882 #define ONE_DIRECTION 1
884 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
885 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
886 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
887 #define LOOPFCT FROM_LOOP
890 uint32_t wc = *((const uint32_t *) inptr); \
892 if (__glibc_likely (wc < 0x80)) \
893 /* It's an one byte sequence. */ \
894 *outptr++ = (unsigned char) wc; \
895 else if (__glibc_likely (wc <= 0x7fffffff \
896 && (wc < 0xd800 || wc > 0xdfff))) \
899 unsigned char *start; \
901 for (step = 2; step < 6; ++step) \
902 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
905 if (__glibc_unlikely (outptr + step > outend)) \
908 result = __GCONV_FULL_OUTPUT; \
913 *outptr = (unsigned char) (~0xff >> step); \
917 start[--step] = 0x80 | (wc & 0x3f); \
925 STANDARD_TO_LOOP_ERR_HANDLER (4); \
930 #define LOOP_NEED_FLAGS
931 #include <iconv/loop.c>
932 #include <iconv/skeleton.c>
935 /* Convert from UTF-8 to the internal (UCS4-like) format. */
936 #define DEFINE_INIT 0
937 #define DEFINE_FINI 0
938 #define MIN_NEEDED_FROM 1
939 #define MAX_NEEDED_FROM 6
940 #define MIN_NEEDED_TO 4
941 #define FROM_DIRECTION 1
942 #define FROM_LOOP utf8_internal_loop
943 #define TO_LOOP utf8_internal_loop /* This is not used. */
944 #define FUNCTION_NAME __gconv_transform_utf8_internal
945 #define ONE_DIRECTION 1
947 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
948 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
949 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
950 #define LOOPFCT FROM_LOOP
953 /* Next input byte. */ \
954 uint32_t ch = *inptr; \
956 if (__glibc_likely (ch < 0x80)) \
958 /* One byte sequence. */ \
966 if (ch >= 0xc2 && ch < 0xe0) \
968 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
969 otherwise the wide character could have been represented \
970 using a single byte. */ \
974 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
976 /* We expect three bytes. */ \
980 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
982 /* We expect four bytes. */ \
986 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
988 /* We expect five bytes. */ \
992 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
994 /* We expect six bytes. */ \
1000 /* Search the end of this ill-formed UTF-8 character. This \
1001 is the next byte with (x & 0xc0) != 0x80. */ \
1005 while (inptr + i < inend \
1006 && (*(inptr + i) & 0xc0) == 0x80 \
1010 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1013 if (__glibc_unlikely (inptr + cnt > inend)) \
1015 /* We don't have enough input. But before we report that check \
1016 that all the bytes are correct. */ \
1017 for (i = 1; inptr + i < inend; ++i) \
1018 if ((inptr[i] & 0xc0) != 0x80) \
1021 if (__glibc_likely (inptr + i == inend)) \
1023 result = __GCONV_INCOMPLETE_INPUT; \
1030 /* Read the possible remaining bytes. */ \
1031 for (i = 1; i < cnt; ++i) \
1033 uint32_t byte = inptr[i]; \
1035 if ((byte & 0xc0) != 0x80) \
1036 /* This is an illegal encoding. */ \
1040 ch |= byte & 0x3f; \
1043 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1044 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1045 have been represented with fewer than cnt bytes. */ \
1046 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1047 /* Do not accept UTF-16 surrogates. */ \
1048 || (ch >= 0xd800 && ch <= 0xdfff)) \
1050 /* This is an illegal encoding. */ \
1057 /* Now adjust the pointers and store the result. */ \
1058 *((uint32_t *) outptr) = ch; \
1059 outptr += sizeof (uint32_t); \
1061 #define LOOP_NEED_FLAGS
1063 #define STORE_REST \
1065 /* We store the remaining bytes while converting them into the UCS4 \
1066 format. We can assume that the first byte in the buffer is \
1067 correct and that it requires a larger number of bytes than there \
1068 are in the input buffer. */ \
1069 wint_t ch = **inptrp; \
1072 state->__count = inend - *inptrp; \
1074 assert (ch != 0xc0 && ch != 0xc1); \
1075 if (ch >= 0xc2 && ch < 0xe0) \
1077 /* We expect two bytes. The first byte cannot be 0xc0 or \
1078 0xc1, otherwise the wide character could have been \
1079 represented using a single byte. */ \
1083 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
1085 /* We expect three bytes. */ \
1089 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
1091 /* We expect four bytes. */ \
1095 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
1097 /* We expect five bytes. */ \
1103 /* We expect six bytes. */ \
1108 /* The first byte is already consumed. */ \
1110 while (++(*inptrp) < inend) \
1113 ch |= **inptrp & 0x3f; \
1117 /* Shift for the so far missing bytes. */ \
1120 /* Store the number of bytes expected for the entire sequence. */ \
1121 state->__count |= cnt << 8; \
1123 /* Store the value. */ \
1124 state->__value.__wch = ch; \
1127 #define UNPACK_BYTES \
1129 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1130 wint_t wch = state->__value.__wch; \
1131 size_t ntotal = state->__count >> 8; \
1133 inlen = state->__count & 255; \
1135 bytebuf[0] = inmask[ntotal - 2]; \
1139 if (--ntotal < inlen) \
1140 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1143 while (ntotal > 1); \
1145 bytebuf[0] |= wch; \
1148 #define CLEAR_STATE \
1152 #include <iconv/loop.c>
1153 #include <iconv/skeleton.c>
1156 /* Convert from UCS2 to the internal (UCS4-like) format. */
1157 #define DEFINE_INIT 0
1158 #define DEFINE_FINI 0
1159 #define MIN_NEEDED_FROM 2
1160 #define MIN_NEEDED_TO 4
1161 #define FROM_DIRECTION 1
1162 #define FROM_LOOP ucs2_internal_loop
1163 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1164 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1165 #define ONE_DIRECTION 1
1167 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1168 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1169 #define LOOPFCT FROM_LOOP
1172 uint16_t u1 = get16 (inptr); \
1174 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1176 /* Surrogate characters in UCS-2 input are not valid. Reject \
1177 them. (Catching this here is not security relevant.) */ \
1178 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1181 *((uint32_t *) outptr) = u1; \
1182 outptr += sizeof (uint32_t); \
1185 #define LOOP_NEED_FLAGS
1186 #include <iconv/loop.c>
1187 #include <iconv/skeleton.c>
1190 /* Convert from the internal (UCS4-like) format to UCS2. */
1191 #define DEFINE_INIT 0
1192 #define DEFINE_FINI 0
1193 #define MIN_NEEDED_FROM 4
1194 #define MIN_NEEDED_TO 2
1195 #define FROM_DIRECTION 1
1196 #define FROM_LOOP internal_ucs2_loop
1197 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1198 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1199 #define ONE_DIRECTION 1
1201 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1202 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1203 #define LOOPFCT FROM_LOOP
1206 uint32_t val = *((const uint32_t *) inptr); \
1208 if (__glibc_unlikely (val >= 0x10000)) \
1210 UNICODE_TAG_HANDLER (val, 4); \
1211 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1213 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1215 /* Surrogate characters in UCS-4 input are not valid. \
1216 We must catch this, because the UCS-2 output might be \
1217 interpreted as UTF-16 by other programs. If we let \
1218 surrogates pass through, attackers could make a security \
1219 hole exploit by synthesizing any desired plane 1-16 \
1221 result = __GCONV_ILLEGAL_INPUT; \
1222 if (! ignore_errors_p ()) \
1230 put16 (outptr, val); \
1231 outptr += sizeof (uint16_t); \
1235 #define LOOP_NEED_FLAGS
1236 #include <iconv/loop.c>
1237 #include <iconv/skeleton.c>
1240 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1241 #define DEFINE_INIT 0
1242 #define DEFINE_FINI 0
1243 #define MIN_NEEDED_FROM 2
1244 #define MIN_NEEDED_TO 4
1245 #define FROM_DIRECTION 1
1246 #define FROM_LOOP ucs2reverse_internal_loop
1247 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1248 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1249 #define ONE_DIRECTION 1
1251 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1252 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1253 #define LOOPFCT FROM_LOOP
1256 uint16_t u1 = bswap_16 (get16 (inptr)); \
1258 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1260 /* Surrogate characters in UCS-2 input are not valid. Reject \
1261 them. (Catching this here is not security relevant.) */ \
1262 if (! ignore_errors_p ()) \
1264 result = __GCONV_ILLEGAL_INPUT; \
1272 *((uint32_t *) outptr) = u1; \
1273 outptr += sizeof (uint32_t); \
1276 #define LOOP_NEED_FLAGS
1277 #include <iconv/loop.c>
1278 #include <iconv/skeleton.c>
1281 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1282 #define DEFINE_INIT 0
1283 #define DEFINE_FINI 0
1284 #define MIN_NEEDED_FROM 4
1285 #define MIN_NEEDED_TO 2
1286 #define FROM_DIRECTION 1
1287 #define FROM_LOOP internal_ucs2reverse_loop
1288 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1289 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1290 #define ONE_DIRECTION 1
1292 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1293 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1294 #define LOOPFCT FROM_LOOP
1297 uint32_t val = *((const uint32_t *) inptr); \
1298 if (__glibc_unlikely (val >= 0x10000)) \
1300 UNICODE_TAG_HANDLER (val, 4); \
1301 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1303 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1305 /* Surrogate characters in UCS-4 input are not valid. \
1306 We must catch this, because the UCS-2 output might be \
1307 interpreted as UTF-16 by other programs. If we let \
1308 surrogates pass through, attackers could make a security \
1309 hole exploit by synthesizing any desired plane 1-16 \
1311 if (! ignore_errors_p ()) \
1313 result = __GCONV_ILLEGAL_INPUT; \
1322 put16 (outptr, bswap_16 (val)); \
1323 outptr += sizeof (uint16_t); \
1327 #define LOOP_NEED_FLAGS
1328 #include <iconv/loop.c>
1329 #include <iconv/skeleton.c>