1 /* Simple transformations functions.
2 Copyright (C) 1997-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
29 #include <sys/param.h>
30 #include <gconv_int.h>
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 const unsigned char **, const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
42 # define EILSEQ EINVAL
46 /* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
49 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
58 /* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
65 #define MIN_NEEDED_FROM 4
66 #define MIN_NEEDED_TO 4
67 #define FROM_DIRECTION 1
68 #define FROM_LOOP internal_ucs4_loop
69 #define TO_LOOP internal_ucs4_loop /* This is not used. */
70 #define FUNCTION_NAME __gconv_transform_internal_ucs4
71 #define ONE_DIRECTION 0
75 __attribute ((always_inline
))
76 internal_ucs4_loop (struct __gconv_step
*step
,
77 struct __gconv_step_data
*step_data
,
78 const unsigned char **inptrp
, const unsigned char *inend
,
79 unsigned char **outptrp
, const unsigned char *outend
,
82 const unsigned char *inptr
= *inptrp
;
83 unsigned char *outptr
= *outptrp
;
84 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
90 uint32_t *outptr32
= (uint32_t *) outptr
;
92 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
93 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
96 *outptrp
= (unsigned char *) outptr32
;
97 #elif __BYTE_ORDER == __BIG_ENDIAN
98 /* Simply copy the data. */
99 *inptrp
= inptr
+ n_convert
* 4;
100 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
102 # error "This endianess is not supported."
105 /* Determine the status. */
106 if (*inptrp
== inend
)
107 result
= __GCONV_EMPTY_INPUT
;
108 else if (*outptrp
+ 4 > outend
)
109 result
= __GCONV_FULL_OUTPUT
;
111 result
= __GCONV_INCOMPLETE_INPUT
;
116 #if !_STRING_ARCH_unaligned
118 __attribute ((always_inline
))
119 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
120 struct __gconv_step_data
*step_data
,
121 const unsigned char **inptrp
,
122 const unsigned char *inend
,
123 unsigned char **outptrp
,
124 const unsigned char *outend
,
125 size_t *irreversible
)
127 const unsigned char *inptr
= *inptrp
;
128 unsigned char *outptr
= *outptrp
;
129 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
132 # if __BYTE_ORDER == __LITTLE_ENDIAN
133 /* Sigh, we have to do some real work. */
136 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
138 outptr
[0] = inptr
[3];
139 outptr
[1] = inptr
[2];
140 outptr
[2] = inptr
[1];
141 outptr
[3] = inptr
[0];
146 # elif __BYTE_ORDER == __BIG_ENDIAN
147 /* Simply copy the data. */
148 *inptrp
= inptr
+ n_convert
* 4;
149 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
151 # error "This endianess is not supported."
154 /* Determine the status. */
155 if (*inptrp
== inend
)
156 result
= __GCONV_EMPTY_INPUT
;
157 else if (*outptrp
+ 4 > outend
)
158 result
= __GCONV_FULL_OUTPUT
;
160 result
= __GCONV_INCOMPLETE_INPUT
;
168 __attribute ((always_inline
))
169 internal_ucs4_loop_single (struct __gconv_step
*step
,
170 struct __gconv_step_data
*step_data
,
171 const unsigned char **inptrp
,
172 const unsigned char *inend
,
173 unsigned char **outptrp
,
174 const unsigned char *outend
,
175 size_t *irreversible
)
177 mbstate_t *state
= step_data
->__statep
;
178 size_t cnt
= state
->__count
& 7;
180 while (*inptrp
< inend
&& cnt
< 4)
181 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
183 if (__glibc_unlikely (cnt
< 4))
185 /* Still not enough bytes. Store the ones in the input buffer. */
186 state
->__count
&= ~7;
187 state
->__count
|= cnt
;
189 return __GCONV_INCOMPLETE_INPUT
;
192 #if __BYTE_ORDER == __LITTLE_ENDIAN
193 (*outptrp
)[0] = state
->__value
.__wchb
[3];
194 (*outptrp
)[1] = state
->__value
.__wchb
[2];
195 (*outptrp
)[2] = state
->__value
.__wchb
[1];
196 (*outptrp
)[3] = state
->__value
.__wchb
[0];
198 #elif __BYTE_ORDER == __BIG_ENDIAN
200 (*outptrp
)[0] = state
->__value
.__wchb
[0];
201 (*outptrp
)[1] = state
->__value
.__wchb
[1];
202 (*outptrp
)[2] = state
->__value
.__wchb
[2];
203 (*outptrp
)[3] = state
->__value
.__wchb
[3];
205 # error "This endianess is not supported."
209 /* Clear the state buffer. */
210 state
->__count
&= ~7;
215 #include <iconv/skeleton.c>
218 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
219 for the other direction we have to check for correct values here. */
220 #define DEFINE_INIT 0
221 #define DEFINE_FINI 0
222 #define MIN_NEEDED_FROM 4
223 #define MIN_NEEDED_TO 4
224 #define FROM_DIRECTION 1
225 #define FROM_LOOP ucs4_internal_loop
226 #define TO_LOOP ucs4_internal_loop /* This is not used. */
227 #define FUNCTION_NAME __gconv_transform_ucs4_internal
228 #define ONE_DIRECTION 0
232 __attribute ((always_inline
))
233 ucs4_internal_loop (struct __gconv_step
*step
,
234 struct __gconv_step_data
*step_data
,
235 const unsigned char **inptrp
, const unsigned char *inend
,
236 unsigned char **outptrp
, const unsigned char *outend
,
237 size_t *irreversible
)
239 int flags
= step_data
->__flags
;
240 const unsigned char *inptr
= *inptrp
;
241 unsigned char *outptr
= *outptrp
;
242 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
246 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
250 #if __BYTE_ORDER == __LITTLE_ENDIAN
251 inval
= bswap_32 (*(const uint32_t *) inptr
);
253 inval
= *(const uint32_t *) inptr
;
256 if (__glibc_unlikely (inval
> 0x7fffffff))
258 /* The value is too large. We don't try transliteration here since
259 this is not an error because of the lack of possibilities to
260 represent the result. This is a genuine bug in the input since
261 UCS4 does not allow such values. */
262 if (irreversible
== NULL
)
263 /* We are transliterating, don't try to correct anything. */
264 return __GCONV_ILLEGAL_INPUT
;
266 if (flags
& __GCONV_IGNORE_ERRORS
)
268 /* Just ignore this character. */
275 return __GCONV_ILLEGAL_INPUT
;
278 *((uint32_t *) outptr
) = inval
;
279 outptr
+= sizeof (uint32_t);
285 /* Determine the status. */
286 if (*inptrp
== inend
)
287 result
= __GCONV_EMPTY_INPUT
;
288 else if (*outptrp
+ 4 > outend
)
289 result
= __GCONV_FULL_OUTPUT
;
291 result
= __GCONV_INCOMPLETE_INPUT
;
296 #if !_STRING_ARCH_unaligned
298 __attribute ((always_inline
))
299 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
300 struct __gconv_step_data
*step_data
,
301 const unsigned char **inptrp
,
302 const unsigned char *inend
,
303 unsigned char **outptrp
,
304 const unsigned char *outend
,
305 size_t *irreversible
)
307 int flags
= step_data
->__flags
;
308 const unsigned char *inptr
= *inptrp
;
309 unsigned char *outptr
= *outptrp
;
310 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
314 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
316 if (__glibc_unlikely (inptr
[0] > 0x80))
318 /* The value is too large. We don't try transliteration here since
319 this is not an error because of the lack of possibilities to
320 represent the result. This is a genuine bug in the input since
321 UCS4 does not allow such values. */
322 if (irreversible
== NULL
)
323 /* We are transliterating, don't try to correct anything. */
324 return __GCONV_ILLEGAL_INPUT
;
326 if (flags
& __GCONV_IGNORE_ERRORS
)
328 /* Just ignore this character. */
335 return __GCONV_ILLEGAL_INPUT
;
338 # if __BYTE_ORDER == __LITTLE_ENDIAN
339 outptr
[3] = inptr
[0];
340 outptr
[2] = inptr
[1];
341 outptr
[1] = inptr
[2];
342 outptr
[0] = inptr
[3];
344 outptr
[0] = inptr
[0];
345 outptr
[1] = inptr
[1];
346 outptr
[2] = inptr
[2];
347 outptr
[3] = inptr
[3];
355 /* Determine the status. */
356 if (*inptrp
== inend
)
357 result
= __GCONV_EMPTY_INPUT
;
358 else if (*outptrp
+ 4 > outend
)
359 result
= __GCONV_FULL_OUTPUT
;
361 result
= __GCONV_INCOMPLETE_INPUT
;
369 __attribute ((always_inline
))
370 ucs4_internal_loop_single (struct __gconv_step
*step
,
371 struct __gconv_step_data
*step_data
,
372 const unsigned char **inptrp
,
373 const unsigned char *inend
,
374 unsigned char **outptrp
,
375 const unsigned char *outend
,
376 size_t *irreversible
)
378 mbstate_t *state
= step_data
->__statep
;
379 int flags
= step_data
->__flags
;
380 size_t cnt
= state
->__count
& 7;
382 while (*inptrp
< inend
&& cnt
< 4)
383 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
385 if (__glibc_unlikely (cnt
< 4))
387 /* Still not enough bytes. Store the ones in the input buffer. */
388 state
->__count
&= ~7;
389 state
->__count
|= cnt
;
391 return __GCONV_INCOMPLETE_INPUT
;
394 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
397 /* The value is too large. We don't try transliteration here since
398 this is not an error because of the lack of possibilities to
399 represent the result. This is a genuine bug in the input since
400 UCS4 does not allow such values. */
401 if (!(flags
& __GCONV_IGNORE_ERRORS
))
403 *inptrp
-= cnt
- (state
->__count
& 7);
404 return __GCONV_ILLEGAL_INPUT
;
409 #if __BYTE_ORDER == __LITTLE_ENDIAN
410 (*outptrp
)[0] = state
->__value
.__wchb
[3];
411 (*outptrp
)[1] = state
->__value
.__wchb
[2];
412 (*outptrp
)[2] = state
->__value
.__wchb
[1];
413 (*outptrp
)[3] = state
->__value
.__wchb
[0];
414 #elif __BYTE_ORDER == __BIG_ENDIAN
415 (*outptrp
)[0] = state
->__value
.__wchb
[0];
416 (*outptrp
)[1] = state
->__value
.__wchb
[1];
417 (*outptrp
)[2] = state
->__value
.__wchb
[2];
418 (*outptrp
)[3] = state
->__value
.__wchb
[3];
424 /* Clear the state buffer. */
425 state
->__count
&= ~7;
430 #include <iconv/skeleton.c>
433 /* Similarly for the little endian form. */
434 #define DEFINE_INIT 0
435 #define DEFINE_FINI 0
436 #define MIN_NEEDED_FROM 4
437 #define MIN_NEEDED_TO 4
438 #define FROM_DIRECTION 1
439 #define FROM_LOOP internal_ucs4le_loop
440 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
441 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
442 #define ONE_DIRECTION 0
446 __attribute ((always_inline
))
447 internal_ucs4le_loop (struct __gconv_step
*step
,
448 struct __gconv_step_data
*step_data
,
449 const unsigned char **inptrp
, const unsigned char *inend
,
450 unsigned char **outptrp
, const unsigned char *outend
,
451 size_t *irreversible
)
453 const unsigned char *inptr
= *inptrp
;
454 unsigned char *outptr
= *outptrp
;
455 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
458 #if __BYTE_ORDER == __BIG_ENDIAN
459 /* Sigh, we have to do some real work. */
461 uint32_t *outptr32
= (uint32_t *) outptr
;
463 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
464 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
465 outptr
= (unsigned char *) outptr32
;
469 #elif __BYTE_ORDER == __LITTLE_ENDIAN
470 /* Simply copy the data. */
471 *inptrp
= inptr
+ n_convert
* 4;
472 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
474 # error "This endianess is not supported."
477 /* Determine the status. */
478 if (*inptrp
== inend
)
479 result
= __GCONV_EMPTY_INPUT
;
480 else if (*outptrp
+ 4 > outend
)
481 result
= __GCONV_FULL_OUTPUT
;
483 result
= __GCONV_INCOMPLETE_INPUT
;
488 #if !_STRING_ARCH_unaligned
490 __attribute ((always_inline
))
491 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
492 struct __gconv_step_data
*step_data
,
493 const unsigned char **inptrp
,
494 const unsigned char *inend
,
495 unsigned char **outptrp
,
496 const unsigned char *outend
,
497 size_t *irreversible
)
499 const unsigned char *inptr
= *inptrp
;
500 unsigned char *outptr
= *outptrp
;
501 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
504 # if __BYTE_ORDER == __BIG_ENDIAN
505 /* Sigh, we have to do some real work. */
508 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
510 outptr
[0] = inptr
[3];
511 outptr
[1] = inptr
[2];
512 outptr
[2] = inptr
[1];
513 outptr
[3] = inptr
[0];
518 # elif __BYTE_ORDER == __LITTLE_ENDIAN
519 /* Simply copy the data. */
520 *inptrp
= inptr
+ n_convert
* 4;
521 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
523 # error "This endianess is not supported."
526 /* Determine the status. */
527 if (*inptrp
== inend
)
528 result
= __GCONV_EMPTY_INPUT
;
529 else if (*inptrp
+ 4 > inend
)
530 result
= __GCONV_INCOMPLETE_INPUT
;
533 assert (*outptrp
+ 4 > outend
);
534 result
= __GCONV_FULL_OUTPUT
;
543 __attribute ((always_inline
))
544 internal_ucs4le_loop_single (struct __gconv_step
*step
,
545 struct __gconv_step_data
*step_data
,
546 const unsigned char **inptrp
,
547 const unsigned char *inend
,
548 unsigned char **outptrp
,
549 const unsigned char *outend
,
550 size_t *irreversible
)
552 mbstate_t *state
= step_data
->__statep
;
553 size_t cnt
= state
->__count
& 7;
555 while (*inptrp
< inend
&& cnt
< 4)
556 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
558 if (__glibc_unlikely (cnt
< 4))
560 /* Still not enough bytes. Store the ones in the input buffer. */
561 state
->__count
&= ~7;
562 state
->__count
|= cnt
;
564 return __GCONV_INCOMPLETE_INPUT
;
567 #if __BYTE_ORDER == __BIG_ENDIAN
568 (*outptrp
)[0] = state
->__value
.__wchb
[3];
569 (*outptrp
)[1] = state
->__value
.__wchb
[2];
570 (*outptrp
)[2] = state
->__value
.__wchb
[1];
571 (*outptrp
)[3] = state
->__value
.__wchb
[0];
575 (*outptrp
)[0] = state
->__value
.__wchb
[0];
576 (*outptrp
)[1] = state
->__value
.__wchb
[1];
577 (*outptrp
)[2] = state
->__value
.__wchb
[2];
578 (*outptrp
)[3] = state
->__value
.__wchb
[3];
584 /* Clear the state buffer. */
585 state
->__count
&= ~7;
590 #include <iconv/skeleton.c>
593 /* And finally from UCS4-LE to the internal encoding. */
594 #define DEFINE_INIT 0
595 #define DEFINE_FINI 0
596 #define MIN_NEEDED_FROM 4
597 #define MIN_NEEDED_TO 4
598 #define FROM_DIRECTION 1
599 #define FROM_LOOP ucs4le_internal_loop
600 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
601 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
602 #define ONE_DIRECTION 0
606 __attribute ((always_inline
))
607 ucs4le_internal_loop (struct __gconv_step
*step
,
608 struct __gconv_step_data
*step_data
,
609 const unsigned char **inptrp
, const unsigned char *inend
,
610 unsigned char **outptrp
, const unsigned char *outend
,
611 size_t *irreversible
)
613 int flags
= step_data
->__flags
;
614 const unsigned char *inptr
= *inptrp
;
615 unsigned char *outptr
= *outptrp
;
616 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
620 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
624 #if __BYTE_ORDER == __BIG_ENDIAN
625 inval
= bswap_32 (*(const uint32_t *) inptr
);
627 inval
= *(const uint32_t *) inptr
;
630 if (__glibc_unlikely (inval
> 0x7fffffff))
632 /* The value is too large. We don't try transliteration here since
633 this is not an error because of the lack of possibilities to
634 represent the result. This is a genuine bug in the input since
635 UCS4 does not allow such values. */
636 if (irreversible
== NULL
)
637 /* We are transliterating, don't try to correct anything. */
638 return __GCONV_ILLEGAL_INPUT
;
640 if (flags
& __GCONV_IGNORE_ERRORS
)
642 /* Just ignore this character. */
649 return __GCONV_ILLEGAL_INPUT
;
652 *((uint32_t *) outptr
) = inval
;
653 outptr
+= sizeof (uint32_t);
659 /* Determine the status. */
660 if (*inptrp
== inend
)
661 result
= __GCONV_EMPTY_INPUT
;
662 else if (*inptrp
+ 4 > inend
)
663 result
= __GCONV_INCOMPLETE_INPUT
;
666 assert (*outptrp
+ 4 > outend
);
667 result
= __GCONV_FULL_OUTPUT
;
673 #if !_STRING_ARCH_unaligned
675 __attribute ((always_inline
))
676 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
677 struct __gconv_step_data
*step_data
,
678 const unsigned char **inptrp
,
679 const unsigned char *inend
,
680 unsigned char **outptrp
,
681 const unsigned char *outend
,
682 size_t *irreversible
)
684 int flags
= step_data
->__flags
;
685 const unsigned char *inptr
= *inptrp
;
686 unsigned char *outptr
= *outptrp
;
687 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
691 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
693 if (__glibc_unlikely (inptr
[3] > 0x80))
695 /* The value is too large. We don't try transliteration here since
696 this is not an error because of the lack of possibilities to
697 represent the result. This is a genuine bug in the input since
698 UCS4 does not allow such values. */
699 if (irreversible
== NULL
)
700 /* We are transliterating, don't try to correct anything. */
701 return __GCONV_ILLEGAL_INPUT
;
703 if (flags
& __GCONV_IGNORE_ERRORS
)
705 /* Just ignore this character. */
712 return __GCONV_ILLEGAL_INPUT
;
715 # if __BYTE_ORDER == __BIG_ENDIAN
716 outptr
[3] = inptr
[0];
717 outptr
[2] = inptr
[1];
718 outptr
[1] = inptr
[2];
719 outptr
[0] = inptr
[3];
721 outptr
[0] = inptr
[0];
722 outptr
[1] = inptr
[1];
723 outptr
[2] = inptr
[2];
724 outptr
[3] = inptr
[3];
733 /* Determine the status. */
734 if (*inptrp
== inend
)
735 result
= __GCONV_EMPTY_INPUT
;
736 else if (*inptrp
+ 4 > inend
)
737 result
= __GCONV_INCOMPLETE_INPUT
;
740 assert (*outptrp
+ 4 > outend
);
741 result
= __GCONV_FULL_OUTPUT
;
750 __attribute ((always_inline
))
751 ucs4le_internal_loop_single (struct __gconv_step
*step
,
752 struct __gconv_step_data
*step_data
,
753 const unsigned char **inptrp
,
754 const unsigned char *inend
,
755 unsigned char **outptrp
,
756 const unsigned char *outend
,
757 size_t *irreversible
)
759 mbstate_t *state
= step_data
->__statep
;
760 int flags
= step_data
->__flags
;
761 size_t cnt
= state
->__count
& 7;
763 while (*inptrp
< inend
&& cnt
< 4)
764 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
766 if (__glibc_unlikely (cnt
< 4))
768 /* Still not enough bytes. Store the ones in the input buffer. */
769 state
->__count
&= ~7;
770 state
->__count
|= cnt
;
772 return __GCONV_INCOMPLETE_INPUT
;
775 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
778 /* The value is too large. We don't try transliteration here since
779 this is not an error because of the lack of possibilities to
780 represent the result. This is a genuine bug in the input since
781 UCS4 does not allow such values. */
782 if (!(flags
& __GCONV_IGNORE_ERRORS
))
783 return __GCONV_ILLEGAL_INPUT
;
787 #if __BYTE_ORDER == __BIG_ENDIAN
788 (*outptrp
)[0] = state
->__value
.__wchb
[3];
789 (*outptrp
)[1] = state
->__value
.__wchb
[2];
790 (*outptrp
)[2] = state
->__value
.__wchb
[1];
791 (*outptrp
)[3] = state
->__value
.__wchb
[0];
793 (*outptrp
)[0] = state
->__value
.__wchb
[0];
794 (*outptrp
)[1] = state
->__value
.__wchb
[1];
795 (*outptrp
)[2] = state
->__value
.__wchb
[2];
796 (*outptrp
)[3] = state
->__value
.__wchb
[3];
802 /* Clear the state buffer. */
803 state
->__count
&= ~7;
808 #include <iconv/skeleton.c>
811 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
812 #define DEFINE_INIT 0
813 #define DEFINE_FINI 0
814 #define MIN_NEEDED_FROM 1
815 #define MIN_NEEDED_TO 4
816 #define FROM_DIRECTION 1
817 #define FROM_LOOP ascii_internal_loop
818 #define TO_LOOP ascii_internal_loop /* This is not used. */
819 #define FUNCTION_NAME __gconv_transform_ascii_internal
820 #define ONE_DIRECTION 1
822 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
823 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
824 #define LOOPFCT FROM_LOOP
827 if (__glibc_unlikely (*inptr > '\x7f')) \
829 /* The value is too large. We don't try transliteration here since \
830 this is not an error because of the lack of possibilities to \
831 represent the result. This is a genuine bug in the input since \
832 ASCII does not allow such values. */ \
833 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
837 /* It's an one byte sequence. */ \
838 *((uint32_t *) outptr) = *inptr++; \
839 outptr += sizeof (uint32_t); \
842 #define LOOP_NEED_FLAGS
843 #include <iconv/loop.c>
844 #include <iconv/skeleton.c>
847 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
848 #define DEFINE_INIT 0
849 #define DEFINE_FINI 0
850 #define MIN_NEEDED_FROM 4
851 #define MIN_NEEDED_TO 1
852 #define FROM_DIRECTION 1
853 #define FROM_LOOP internal_ascii_loop
854 #define TO_LOOP internal_ascii_loop /* This is not used. */
855 #define FUNCTION_NAME __gconv_transform_internal_ascii
856 #define ONE_DIRECTION 1
858 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
859 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
860 #define LOOPFCT FROM_LOOP
863 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
865 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
866 STANDARD_TO_LOOP_ERR_HANDLER (4); \
870 /* It's an one byte sequence. */ \
871 *outptr++ = *((const uint32_t *) inptr); \
872 inptr += sizeof (uint32_t); \
875 #define LOOP_NEED_FLAGS
876 #include <iconv/loop.c>
877 #include <iconv/skeleton.c>
880 /* Convert from the internal (UCS4-like) format to UTF-8. */
881 #define DEFINE_INIT 0
882 #define DEFINE_FINI 0
883 #define MIN_NEEDED_FROM 4
884 #define MIN_NEEDED_TO 1
885 #define MAX_NEEDED_TO 6
886 #define FROM_DIRECTION 1
887 #define FROM_LOOP internal_utf8_loop
888 #define TO_LOOP internal_utf8_loop /* This is not used. */
889 #define FUNCTION_NAME __gconv_transform_internal_utf8
890 #define ONE_DIRECTION 1
892 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
893 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
894 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
895 #define LOOPFCT FROM_LOOP
898 uint32_t wc = *((const uint32_t *) inptr); \
900 if (__glibc_likely (wc < 0x80)) \
901 /* It's an one byte sequence. */ \
902 *outptr++ = (unsigned char) wc; \
903 else if (__glibc_likely (wc <= 0x7fffffff \
904 && (wc < 0xd800 || wc > 0xdfff))) \
907 unsigned char *start; \
909 for (step = 2; step < 6; ++step) \
910 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
913 if (__glibc_unlikely (outptr + step > outend)) \
916 result = __GCONV_FULL_OUTPUT; \
921 *outptr = (unsigned char) (~0xff >> step); \
925 start[--step] = 0x80 | (wc & 0x3f); \
933 STANDARD_TO_LOOP_ERR_HANDLER (4); \
938 #define LOOP_NEED_FLAGS
939 #include <iconv/loop.c>
940 #include <iconv/skeleton.c>
943 /* Convert from UTF-8 to the internal (UCS4-like) format. */
944 #define DEFINE_INIT 0
945 #define DEFINE_FINI 0
946 #define MIN_NEEDED_FROM 1
947 #define MAX_NEEDED_FROM 6
948 #define MIN_NEEDED_TO 4
949 #define FROM_DIRECTION 1
950 #define FROM_LOOP utf8_internal_loop
951 #define TO_LOOP utf8_internal_loop /* This is not used. */
952 #define FUNCTION_NAME __gconv_transform_utf8_internal
953 #define ONE_DIRECTION 1
955 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
956 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
957 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
958 #define LOOPFCT FROM_LOOP
961 /* Next input byte. */ \
962 uint32_t ch = *inptr; \
964 if (__glibc_likely (ch < 0x80)) \
966 /* One byte sequence. */ \
974 if (ch >= 0xc2 && ch < 0xe0) \
976 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
977 otherwise the wide character could have been represented \
978 using a single byte. */ \
982 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
984 /* We expect three bytes. */ \
988 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
990 /* We expect four bytes. */ \
994 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
996 /* We expect five bytes. */ \
1000 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
1002 /* We expect six bytes. */ \
1008 /* Search the end of this ill-formed UTF-8 character. This \
1009 is the next byte with (x & 0xc0) != 0x80. */ \
1013 while (inptr + i < inend \
1014 && (*(inptr + i) & 0xc0) == 0x80 \
1018 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1021 if (__glibc_unlikely (inptr + cnt > inend)) \
1023 /* We don't have enough input. But before we report that check \
1024 that all the bytes are correct. */ \
1025 for (i = 1; inptr + i < inend; ++i) \
1026 if ((inptr[i] & 0xc0) != 0x80) \
1029 if (__glibc_likely (inptr + i == inend)) \
1031 result = __GCONV_INCOMPLETE_INPUT; \
1038 /* Read the possible remaining bytes. */ \
1039 for (i = 1; i < cnt; ++i) \
1041 uint32_t byte = inptr[i]; \
1043 if ((byte & 0xc0) != 0x80) \
1044 /* This is an illegal encoding. */ \
1048 ch |= byte & 0x3f; \
1051 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1052 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1053 have been represented with fewer than cnt bytes. */ \
1054 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1055 /* Do not accept UTF-16 surrogates. */ \
1056 || (ch >= 0xd800 && ch <= 0xdfff)) \
1058 /* This is an illegal encoding. */ \
1065 /* Now adjust the pointers and store the result. */ \
1066 *((uint32_t *) outptr) = ch; \
1067 outptr += sizeof (uint32_t); \
1069 #define LOOP_NEED_FLAGS
1071 #define STORE_REST \
1073 /* We store the remaining bytes while converting them into the UCS4 \
1074 format. We can assume that the first byte in the buffer is \
1075 correct and that it requires a larger number of bytes than there \
1076 are in the input buffer. */ \
1077 wint_t ch = **inptrp; \
1080 state->__count = inend - *inptrp; \
1082 assert (ch != 0xc0 && ch != 0xc1); \
1083 if (ch >= 0xc2 && ch < 0xe0) \
1085 /* We expect two bytes. The first byte cannot be 0xc0 or \
1086 0xc1, otherwise the wide character could have been \
1087 represented using a single byte. */ \
1091 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
1093 /* We expect three bytes. */ \
1097 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
1099 /* We expect four bytes. */ \
1103 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
1105 /* We expect five bytes. */ \
1111 /* We expect six bytes. */ \
1116 /* The first byte is already consumed. */ \
1118 while (++(*inptrp) < inend) \
1121 ch |= **inptrp & 0x3f; \
1125 /* Shift for the so far missing bytes. */ \
1128 /* Store the number of bytes expected for the entire sequence. */ \
1129 state->__count |= cnt << 8; \
1131 /* Store the value. */ \
1132 state->__value.__wch = ch; \
1135 #define UNPACK_BYTES \
1137 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1138 wint_t wch = state->__value.__wch; \
1139 size_t ntotal = state->__count >> 8; \
1141 inlen = state->__count & 255; \
1143 bytebuf[0] = inmask[ntotal - 2]; \
1147 if (--ntotal < inlen) \
1148 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1151 while (ntotal > 1); \
1153 bytebuf[0] |= wch; \
1156 #define CLEAR_STATE \
1160 #include <iconv/loop.c>
1161 #include <iconv/skeleton.c>
1164 /* Convert from UCS2 to the internal (UCS4-like) format. */
1165 #define DEFINE_INIT 0
1166 #define DEFINE_FINI 0
1167 #define MIN_NEEDED_FROM 2
1168 #define MIN_NEEDED_TO 4
1169 #define FROM_DIRECTION 1
1170 #define FROM_LOOP ucs2_internal_loop
1171 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1172 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1173 #define ONE_DIRECTION 1
1175 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1176 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1177 #define LOOPFCT FROM_LOOP
1180 uint16_t u1 = get16 (inptr); \
1182 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1184 /* Surrogate characters in UCS-2 input are not valid. Reject \
1185 them. (Catching this here is not security relevant.) */ \
1186 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1189 *((uint32_t *) outptr) = u1; \
1190 outptr += sizeof (uint32_t); \
1193 #define LOOP_NEED_FLAGS
1194 #include <iconv/loop.c>
1195 #include <iconv/skeleton.c>
1198 /* Convert from the internal (UCS4-like) format to UCS2. */
1199 #define DEFINE_INIT 0
1200 #define DEFINE_FINI 0
1201 #define MIN_NEEDED_FROM 4
1202 #define MIN_NEEDED_TO 2
1203 #define FROM_DIRECTION 1
1204 #define FROM_LOOP internal_ucs2_loop
1205 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1206 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1207 #define ONE_DIRECTION 1
1209 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1210 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1211 #define LOOPFCT FROM_LOOP
1214 uint32_t val = *((const uint32_t *) inptr); \
1216 if (__glibc_unlikely (val >= 0x10000)) \
1218 UNICODE_TAG_HANDLER (val, 4); \
1219 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1221 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1223 /* Surrogate characters in UCS-4 input are not valid. \
1224 We must catch this, because the UCS-2 output might be \
1225 interpreted as UTF-16 by other programs. If we let \
1226 surrogates pass through, attackers could make a security \
1227 hole exploit by synthesizing any desired plane 1-16 \
1229 result = __GCONV_ILLEGAL_INPUT; \
1230 if (! ignore_errors_p ()) \
1238 put16 (outptr, val); \
1239 outptr += sizeof (uint16_t); \
1243 #define LOOP_NEED_FLAGS
1244 #include <iconv/loop.c>
1245 #include <iconv/skeleton.c>
1248 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1249 #define DEFINE_INIT 0
1250 #define DEFINE_FINI 0
1251 #define MIN_NEEDED_FROM 2
1252 #define MIN_NEEDED_TO 4
1253 #define FROM_DIRECTION 1
1254 #define FROM_LOOP ucs2reverse_internal_loop
1255 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1256 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1257 #define ONE_DIRECTION 1
1259 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1260 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1261 #define LOOPFCT FROM_LOOP
1264 uint16_t u1 = bswap_16 (get16 (inptr)); \
1266 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1268 /* Surrogate characters in UCS-2 input are not valid. Reject \
1269 them. (Catching this here is not security relevant.) */ \
1270 if (! ignore_errors_p ()) \
1272 result = __GCONV_ILLEGAL_INPUT; \
1280 *((uint32_t *) outptr) = u1; \
1281 outptr += sizeof (uint32_t); \
1284 #define LOOP_NEED_FLAGS
1285 #include <iconv/loop.c>
1286 #include <iconv/skeleton.c>
1289 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1290 #define DEFINE_INIT 0
1291 #define DEFINE_FINI 0
1292 #define MIN_NEEDED_FROM 4
1293 #define MIN_NEEDED_TO 2
1294 #define FROM_DIRECTION 1
1295 #define FROM_LOOP internal_ucs2reverse_loop
1296 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1297 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1298 #define ONE_DIRECTION 1
1300 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1301 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1302 #define LOOPFCT FROM_LOOP
1305 uint32_t val = *((const uint32_t *) inptr); \
1306 if (__glibc_unlikely (val >= 0x10000)) \
1308 UNICODE_TAG_HANDLER (val, 4); \
1309 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1311 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1313 /* Surrogate characters in UCS-4 input are not valid. \
1314 We must catch this, because the UCS-2 output might be \
1315 interpreted as UTF-16 by other programs. If we let \
1316 surrogates pass through, attackers could make a security \
1317 hole exploit by synthesizing any desired plane 1-16 \
1319 if (! ignore_errors_p ()) \
1321 result = __GCONV_ILLEGAL_INPUT; \
1330 put16 (outptr, bswap_16 (val)); \
1331 outptr += sizeof (uint16_t); \
1335 #define LOOP_NEED_FLAGS
1336 #include <iconv/loop.c>
1337 #include <iconv/skeleton.c>