1 /* Simple transformations functions.
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
28 #include <sys/param.h>
29 #include <gconv_int.h>
31 #define BUILTIN_ALIAS(s1, s2) /* nothing */
32 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
33 MinF, MaxF, MinT, MaxT) \
34 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
35 const unsigned char **, const unsigned char *, \
36 unsigned char **, size_t *, int, int);
37 #include "gconv_builtin.h"
41 # define EILSEQ EINVAL
45 /* Specialized conversion function for a single byte to INTERNAL, recognizing
46 only ASCII characters. */
48 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
57 /* Transform from the internal, UCS4-like format, to UCS4. The
58 difference between the internal ucs4 format and the real UCS4
59 format is, if any, the endianess. The Unicode/ISO 10646 says that
60 unless some higher protocol specifies it differently, the byte
61 order is big endian.*/
64 #define MIN_NEEDED_FROM 4
65 #define MIN_NEEDED_TO 4
66 #define FROM_DIRECTION 1
67 #define FROM_LOOP internal_ucs4_loop
68 #define TO_LOOP internal_ucs4_loop /* This is not used. */
69 #define FUNCTION_NAME __gconv_transform_internal_ucs4
70 #define ONE_DIRECTION 0
74 __attribute ((always_inline
))
75 internal_ucs4_loop (struct __gconv_step
*step
,
76 struct __gconv_step_data
*step_data
,
77 const unsigned char **inptrp
, const unsigned char *inend
,
78 unsigned char **outptrp
, const unsigned char *outend
,
81 const unsigned char *inptr
= *inptrp
;
82 unsigned char *outptr
= *outptrp
;
83 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 /* Sigh, we have to do some real work. */
89 uint32_t *outptr32
= (uint32_t *) outptr
;
91 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
92 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
95 *outptrp
= (unsigned char *) outptr32
;
96 #elif __BYTE_ORDER == __BIG_ENDIAN
97 /* Simply copy the data. */
98 *inptrp
= inptr
+ n_convert
* 4;
99 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
101 # error "This endianess is not supported."
104 /* Determine the status. */
105 if (*inptrp
== inend
)
106 result
= __GCONV_EMPTY_INPUT
;
107 else if (*outptrp
+ 4 > outend
)
108 result
= __GCONV_FULL_OUTPUT
;
110 result
= __GCONV_INCOMPLETE_INPUT
;
115 #if !_STRING_ARCH_unaligned
117 __attribute ((always_inline
))
118 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
119 struct __gconv_step_data
*step_data
,
120 const unsigned char **inptrp
,
121 const unsigned char *inend
,
122 unsigned char **outptrp
,
123 const unsigned char *outend
,
124 size_t *irreversible
)
126 const unsigned char *inptr
= *inptrp
;
127 unsigned char *outptr
= *outptrp
;
128 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
135 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
137 outptr
[0] = inptr
[3];
138 outptr
[1] = inptr
[2];
139 outptr
[2] = inptr
[1];
140 outptr
[3] = inptr
[0];
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp
= inptr
+ n_convert
* 4;
148 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
150 # error "This endianess is not supported."
153 /* Determine the status. */
154 if (*inptrp
== inend
)
155 result
= __GCONV_EMPTY_INPUT
;
156 else if (*outptrp
+ 4 > outend
)
157 result
= __GCONV_FULL_OUTPUT
;
159 result
= __GCONV_INCOMPLETE_INPUT
;
167 __attribute ((always_inline
))
168 internal_ucs4_loop_single (struct __gconv_step
*step
,
169 struct __gconv_step_data
*step_data
,
170 const unsigned char **inptrp
,
171 const unsigned char *inend
,
172 unsigned char **outptrp
,
173 const unsigned char *outend
,
174 size_t *irreversible
)
176 mbstate_t *state
= step_data
->__statep
;
177 size_t cnt
= state
->__count
& 7;
179 while (*inptrp
< inend
&& cnt
< 4)
180 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
182 if (__glibc_unlikely (cnt
< 4))
184 /* Still not enough bytes. Store the ones in the input buffer. */
185 state
->__count
&= ~7;
186 state
->__count
|= cnt
;
188 return __GCONV_INCOMPLETE_INPUT
;
191 #if __BYTE_ORDER == __LITTLE_ENDIAN
192 (*outptrp
)[0] = state
->__value
.__wchb
[3];
193 (*outptrp
)[1] = state
->__value
.__wchb
[2];
194 (*outptrp
)[2] = state
->__value
.__wchb
[1];
195 (*outptrp
)[3] = state
->__value
.__wchb
[0];
197 #elif __BYTE_ORDER == __BIG_ENDIAN
199 (*outptrp
)[0] = state
->__value
.__wchb
[0];
200 (*outptrp
)[1] = state
->__value
.__wchb
[1];
201 (*outptrp
)[2] = state
->__value
.__wchb
[2];
202 (*outptrp
)[3] = state
->__value
.__wchb
[3];
204 # error "This endianess is not supported."
208 /* Clear the state buffer. */
209 state
->__count
&= ~7;
214 #include <iconv/skeleton.c>
217 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
218 for the other direction we have to check for correct values here. */
219 #define DEFINE_INIT 0
220 #define DEFINE_FINI 0
221 #define MIN_NEEDED_FROM 4
222 #define MIN_NEEDED_TO 4
223 #define FROM_DIRECTION 1
224 #define FROM_LOOP ucs4_internal_loop
225 #define TO_LOOP ucs4_internal_loop /* This is not used. */
226 #define FUNCTION_NAME __gconv_transform_ucs4_internal
227 #define ONE_DIRECTION 0
231 __attribute ((always_inline
))
232 ucs4_internal_loop (struct __gconv_step
*step
,
233 struct __gconv_step_data
*step_data
,
234 const unsigned char **inptrp
, const unsigned char *inend
,
235 unsigned char **outptrp
, const unsigned char *outend
,
236 size_t *irreversible
)
238 int flags
= step_data
->__flags
;
239 const unsigned char *inptr
= *inptrp
;
240 unsigned char *outptr
= *outptrp
;
243 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248 inval
= bswap_32 (*(const uint32_t *) inptr
);
250 inval
= *(const uint32_t *) inptr
;
253 if (__glibc_unlikely (inval
> 0x7fffffff))
255 /* The value is too large. We don't try transliteration here since
256 this is not an error because of the lack of possibilities to
257 represent the result. This is a genuine bug in the input since
258 UCS4 does not allow such values. */
259 if (irreversible
== NULL
)
260 /* We are transliterating, don't try to correct anything. */
261 return __GCONV_ILLEGAL_INPUT
;
263 if (flags
& __GCONV_IGNORE_ERRORS
)
265 /* Just ignore this character. */
272 return __GCONV_ILLEGAL_INPUT
;
275 *((uint32_t *) outptr
) = inval
;
276 outptr
+= sizeof (uint32_t);
282 /* Determine the status. */
283 if (*inptrp
== inend
)
284 result
= __GCONV_EMPTY_INPUT
;
285 else if (*outptrp
+ 4 > outend
)
286 result
= __GCONV_FULL_OUTPUT
;
288 result
= __GCONV_INCOMPLETE_INPUT
;
293 #if !_STRING_ARCH_unaligned
295 __attribute ((always_inline
))
296 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
297 struct __gconv_step_data
*step_data
,
298 const unsigned char **inptrp
,
299 const unsigned char *inend
,
300 unsigned char **outptrp
,
301 const unsigned char *outend
,
302 size_t *irreversible
)
304 int flags
= step_data
->__flags
;
305 const unsigned char *inptr
= *inptrp
;
306 unsigned char *outptr
= *outptrp
;
309 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
311 if (__glibc_unlikely (inptr
[0] > 0x80))
313 /* The value is too large. We don't try transliteration here since
314 this is not an error because of the lack of possibilities to
315 represent the result. This is a genuine bug in the input since
316 UCS4 does not allow such values. */
317 if (irreversible
== NULL
)
318 /* We are transliterating, don't try to correct anything. */
319 return __GCONV_ILLEGAL_INPUT
;
321 if (flags
& __GCONV_IGNORE_ERRORS
)
323 /* Just ignore this character. */
330 return __GCONV_ILLEGAL_INPUT
;
333 # if __BYTE_ORDER == __LITTLE_ENDIAN
334 outptr
[3] = inptr
[0];
335 outptr
[2] = inptr
[1];
336 outptr
[1] = inptr
[2];
337 outptr
[0] = inptr
[3];
339 outptr
[0] = inptr
[0];
340 outptr
[1] = inptr
[1];
341 outptr
[2] = inptr
[2];
342 outptr
[3] = inptr
[3];
350 /* Determine the status. */
351 if (*inptrp
== inend
)
352 result
= __GCONV_EMPTY_INPUT
;
353 else if (*outptrp
+ 4 > outend
)
354 result
= __GCONV_FULL_OUTPUT
;
356 result
= __GCONV_INCOMPLETE_INPUT
;
364 __attribute ((always_inline
))
365 ucs4_internal_loop_single (struct __gconv_step
*step
,
366 struct __gconv_step_data
*step_data
,
367 const unsigned char **inptrp
,
368 const unsigned char *inend
,
369 unsigned char **outptrp
,
370 const unsigned char *outend
,
371 size_t *irreversible
)
373 mbstate_t *state
= step_data
->__statep
;
374 int flags
= step_data
->__flags
;
375 size_t cnt
= state
->__count
& 7;
377 while (*inptrp
< inend
&& cnt
< 4)
378 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
380 if (__glibc_unlikely (cnt
< 4))
382 /* Still not enough bytes. Store the ones in the input buffer. */
383 state
->__count
&= ~7;
384 state
->__count
|= cnt
;
386 return __GCONV_INCOMPLETE_INPUT
;
389 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
392 /* The value is too large. We don't try transliteration here since
393 this is not an error because of the lack of possibilities to
394 represent the result. This is a genuine bug in the input since
395 UCS4 does not allow such values. */
396 if (!(flags
& __GCONV_IGNORE_ERRORS
))
398 *inptrp
-= cnt
- (state
->__count
& 7);
399 return __GCONV_ILLEGAL_INPUT
;
404 #if __BYTE_ORDER == __LITTLE_ENDIAN
405 (*outptrp
)[0] = state
->__value
.__wchb
[3];
406 (*outptrp
)[1] = state
->__value
.__wchb
[2];
407 (*outptrp
)[2] = state
->__value
.__wchb
[1];
408 (*outptrp
)[3] = state
->__value
.__wchb
[0];
409 #elif __BYTE_ORDER == __BIG_ENDIAN
410 (*outptrp
)[0] = state
->__value
.__wchb
[0];
411 (*outptrp
)[1] = state
->__value
.__wchb
[1];
412 (*outptrp
)[2] = state
->__value
.__wchb
[2];
413 (*outptrp
)[3] = state
->__value
.__wchb
[3];
419 /* Clear the state buffer. */
420 state
->__count
&= ~7;
425 #include <iconv/skeleton.c>
428 /* Similarly for the little endian form. */
429 #define DEFINE_INIT 0
430 #define DEFINE_FINI 0
431 #define MIN_NEEDED_FROM 4
432 #define MIN_NEEDED_TO 4
433 #define FROM_DIRECTION 1
434 #define FROM_LOOP internal_ucs4le_loop
435 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
436 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
437 #define ONE_DIRECTION 0
441 __attribute ((always_inline
))
442 internal_ucs4le_loop (struct __gconv_step
*step
,
443 struct __gconv_step_data
*step_data
,
444 const unsigned char **inptrp
, const unsigned char *inend
,
445 unsigned char **outptrp
, const unsigned char *outend
,
446 size_t *irreversible
)
448 const unsigned char *inptr
= *inptrp
;
449 unsigned char *outptr
= *outptrp
;
450 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
453 #if __BYTE_ORDER == __BIG_ENDIAN
454 /* Sigh, we have to do some real work. */
456 uint32_t *outptr32
= (uint32_t *) outptr
;
458 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
459 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
460 outptr
= (unsigned char *) outptr32
;
464 #elif __BYTE_ORDER == __LITTLE_ENDIAN
465 /* Simply copy the data. */
466 *inptrp
= inptr
+ n_convert
* 4;
467 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
469 # error "This endianess is not supported."
472 /* Determine the status. */
473 if (*inptrp
== inend
)
474 result
= __GCONV_EMPTY_INPUT
;
475 else if (*outptrp
+ 4 > outend
)
476 result
= __GCONV_FULL_OUTPUT
;
478 result
= __GCONV_INCOMPLETE_INPUT
;
483 #if !_STRING_ARCH_unaligned
485 __attribute ((always_inline
))
486 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
487 struct __gconv_step_data
*step_data
,
488 const unsigned char **inptrp
,
489 const unsigned char *inend
,
490 unsigned char **outptrp
,
491 const unsigned char *outend
,
492 size_t *irreversible
)
494 const unsigned char *inptr
= *inptrp
;
495 unsigned char *outptr
= *outptrp
;
496 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
499 # if __BYTE_ORDER == __BIG_ENDIAN
500 /* Sigh, we have to do some real work. */
503 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
505 outptr
[0] = inptr
[3];
506 outptr
[1] = inptr
[2];
507 outptr
[2] = inptr
[1];
508 outptr
[3] = inptr
[0];
513 # elif __BYTE_ORDER == __LITTLE_ENDIAN
514 /* Simply copy the data. */
515 *inptrp
= inptr
+ n_convert
* 4;
516 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
518 # error "This endianess is not supported."
521 /* Determine the status. */
522 if (*inptrp
== inend
)
523 result
= __GCONV_EMPTY_INPUT
;
524 else if (*inptrp
+ 4 > inend
)
525 result
= __GCONV_INCOMPLETE_INPUT
;
528 assert (*outptrp
+ 4 > outend
);
529 result
= __GCONV_FULL_OUTPUT
;
538 __attribute ((always_inline
))
539 internal_ucs4le_loop_single (struct __gconv_step
*step
,
540 struct __gconv_step_data
*step_data
,
541 const unsigned char **inptrp
,
542 const unsigned char *inend
,
543 unsigned char **outptrp
,
544 const unsigned char *outend
,
545 size_t *irreversible
)
547 mbstate_t *state
= step_data
->__statep
;
548 size_t cnt
= state
->__count
& 7;
550 while (*inptrp
< inend
&& cnt
< 4)
551 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
553 if (__glibc_unlikely (cnt
< 4))
555 /* Still not enough bytes. Store the ones in the input buffer. */
556 state
->__count
&= ~7;
557 state
->__count
|= cnt
;
559 return __GCONV_INCOMPLETE_INPUT
;
562 #if __BYTE_ORDER == __BIG_ENDIAN
563 (*outptrp
)[0] = state
->__value
.__wchb
[3];
564 (*outptrp
)[1] = state
->__value
.__wchb
[2];
565 (*outptrp
)[2] = state
->__value
.__wchb
[1];
566 (*outptrp
)[3] = state
->__value
.__wchb
[0];
570 (*outptrp
)[0] = state
->__value
.__wchb
[0];
571 (*outptrp
)[1] = state
->__value
.__wchb
[1];
572 (*outptrp
)[2] = state
->__value
.__wchb
[2];
573 (*outptrp
)[3] = state
->__value
.__wchb
[3];
579 /* Clear the state buffer. */
580 state
->__count
&= ~7;
585 #include <iconv/skeleton.c>
588 /* And finally from UCS4-LE to the internal encoding. */
589 #define DEFINE_INIT 0
590 #define DEFINE_FINI 0
591 #define MIN_NEEDED_FROM 4
592 #define MIN_NEEDED_TO 4
593 #define FROM_DIRECTION 1
594 #define FROM_LOOP ucs4le_internal_loop
595 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
596 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
597 #define ONE_DIRECTION 0
601 __attribute ((always_inline
))
602 ucs4le_internal_loop (struct __gconv_step
*step
,
603 struct __gconv_step_data
*step_data
,
604 const unsigned char **inptrp
, const unsigned char *inend
,
605 unsigned char **outptrp
, const unsigned char *outend
,
606 size_t *irreversible
)
608 int flags
= step_data
->__flags
;
609 const unsigned char *inptr
= *inptrp
;
610 unsigned char *outptr
= *outptrp
;
613 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
617 #if __BYTE_ORDER == __BIG_ENDIAN
618 inval
= bswap_32 (*(const uint32_t *) inptr
);
620 inval
= *(const uint32_t *) inptr
;
623 if (__glibc_unlikely (inval
> 0x7fffffff))
625 /* The value is too large. We don't try transliteration here since
626 this is not an error because of the lack of possibilities to
627 represent the result. This is a genuine bug in the input since
628 UCS4 does not allow such values. */
629 if (irreversible
== NULL
)
630 /* We are transliterating, don't try to correct anything. */
631 return __GCONV_ILLEGAL_INPUT
;
633 if (flags
& __GCONV_IGNORE_ERRORS
)
635 /* Just ignore this character. */
642 return __GCONV_ILLEGAL_INPUT
;
645 *((uint32_t *) outptr
) = inval
;
646 outptr
+= sizeof (uint32_t);
652 /* Determine the status. */
653 if (*inptrp
== inend
)
654 result
= __GCONV_EMPTY_INPUT
;
655 else if (*inptrp
+ 4 > inend
)
656 result
= __GCONV_INCOMPLETE_INPUT
;
659 assert (*outptrp
+ 4 > outend
);
660 result
= __GCONV_FULL_OUTPUT
;
666 #if !_STRING_ARCH_unaligned
668 __attribute ((always_inline
))
669 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
670 struct __gconv_step_data
*step_data
,
671 const unsigned char **inptrp
,
672 const unsigned char *inend
,
673 unsigned char **outptrp
,
674 const unsigned char *outend
,
675 size_t *irreversible
)
677 int flags
= step_data
->__flags
;
678 const unsigned char *inptr
= *inptrp
;
679 unsigned char *outptr
= *outptrp
;
682 for (; inptr
+ 4 <= inend
&& outptr
+ 4 <= outend
; inptr
+= 4)
684 if (__glibc_unlikely (inptr
[3] > 0x80))
686 /* The value is too large. We don't try transliteration here since
687 this is not an error because of the lack of possibilities to
688 represent the result. This is a genuine bug in the input since
689 UCS4 does not allow such values. */
690 if (irreversible
== NULL
)
691 /* We are transliterating, don't try to correct anything. */
692 return __GCONV_ILLEGAL_INPUT
;
694 if (flags
& __GCONV_IGNORE_ERRORS
)
696 /* Just ignore this character. */
703 return __GCONV_ILLEGAL_INPUT
;
706 # if __BYTE_ORDER == __BIG_ENDIAN
707 outptr
[3] = inptr
[0];
708 outptr
[2] = inptr
[1];
709 outptr
[1] = inptr
[2];
710 outptr
[0] = inptr
[3];
712 outptr
[0] = inptr
[0];
713 outptr
[1] = inptr
[1];
714 outptr
[2] = inptr
[2];
715 outptr
[3] = inptr
[3];
724 /* Determine the status. */
725 if (*inptrp
== inend
)
726 result
= __GCONV_EMPTY_INPUT
;
727 else if (*inptrp
+ 4 > inend
)
728 result
= __GCONV_INCOMPLETE_INPUT
;
731 assert (*outptrp
+ 4 > outend
);
732 result
= __GCONV_FULL_OUTPUT
;
741 __attribute ((always_inline
))
742 ucs4le_internal_loop_single (struct __gconv_step
*step
,
743 struct __gconv_step_data
*step_data
,
744 const unsigned char **inptrp
,
745 const unsigned char *inend
,
746 unsigned char **outptrp
,
747 const unsigned char *outend
,
748 size_t *irreversible
)
750 mbstate_t *state
= step_data
->__statep
;
751 int flags
= step_data
->__flags
;
752 size_t cnt
= state
->__count
& 7;
754 while (*inptrp
< inend
&& cnt
< 4)
755 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
757 if (__glibc_unlikely (cnt
< 4))
759 /* Still not enough bytes. Store the ones in the input buffer. */
760 state
->__count
&= ~7;
761 state
->__count
|= cnt
;
763 return __GCONV_INCOMPLETE_INPUT
;
766 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
769 /* The value is too large. We don't try transliteration here since
770 this is not an error because of the lack of possibilities to
771 represent the result. This is a genuine bug in the input since
772 UCS4 does not allow such values. */
773 if (!(flags
& __GCONV_IGNORE_ERRORS
))
774 return __GCONV_ILLEGAL_INPUT
;
778 #if __BYTE_ORDER == __BIG_ENDIAN
779 (*outptrp
)[0] = state
->__value
.__wchb
[3];
780 (*outptrp
)[1] = state
->__value
.__wchb
[2];
781 (*outptrp
)[2] = state
->__value
.__wchb
[1];
782 (*outptrp
)[3] = state
->__value
.__wchb
[0];
784 (*outptrp
)[0] = state
->__value
.__wchb
[0];
785 (*outptrp
)[1] = state
->__value
.__wchb
[1];
786 (*outptrp
)[2] = state
->__value
.__wchb
[2];
787 (*outptrp
)[3] = state
->__value
.__wchb
[3];
793 /* Clear the state buffer. */
794 state
->__count
&= ~7;
799 #include <iconv/skeleton.c>
802 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
803 #define DEFINE_INIT 0
804 #define DEFINE_FINI 0
805 #define MIN_NEEDED_FROM 1
806 #define MIN_NEEDED_TO 4
807 #define FROM_DIRECTION 1
808 #define FROM_LOOP ascii_internal_loop
809 #define TO_LOOP ascii_internal_loop /* This is not used. */
810 #define FUNCTION_NAME __gconv_transform_ascii_internal
811 #define ONE_DIRECTION 1
813 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
814 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
815 #define LOOPFCT FROM_LOOP
818 if (__glibc_unlikely (*inptr > '\x7f')) \
820 /* The value is too large. We don't try transliteration here since \
821 this is not an error because of the lack of possibilities to \
822 represent the result. This is a genuine bug in the input since \
823 ASCII does not allow such values. */ \
824 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
828 /* It's an one byte sequence. */ \
829 *((uint32_t *) outptr) = *inptr++; \
830 outptr += sizeof (uint32_t); \
833 #define LOOP_NEED_FLAGS
834 #include <iconv/loop.c>
835 #include <iconv/skeleton.c>
838 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
839 #define DEFINE_INIT 0
840 #define DEFINE_FINI 0
841 #define MIN_NEEDED_FROM 4
842 #define MIN_NEEDED_TO 1
843 #define FROM_DIRECTION 1
844 #define FROM_LOOP internal_ascii_loop
845 #define TO_LOOP internal_ascii_loop /* This is not used. */
846 #define FUNCTION_NAME __gconv_transform_internal_ascii
847 #define ONE_DIRECTION 1
849 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
850 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
851 #define LOOPFCT FROM_LOOP
854 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
856 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
857 STANDARD_TO_LOOP_ERR_HANDLER (4); \
861 /* It's an one byte sequence. */ \
862 *outptr++ = *((const uint32_t *) inptr); \
863 inptr += sizeof (uint32_t); \
866 #define LOOP_NEED_FLAGS
867 #include <iconv/loop.c>
868 #include <iconv/skeleton.c>
871 /* Convert from the internal (UCS4-like) format to UTF-8. */
872 #define DEFINE_INIT 0
873 #define DEFINE_FINI 0
874 #define MIN_NEEDED_FROM 4
875 #define MIN_NEEDED_TO 1
876 #define MAX_NEEDED_TO 6
877 #define FROM_DIRECTION 1
878 #define FROM_LOOP internal_utf8_loop
879 #define TO_LOOP internal_utf8_loop /* This is not used. */
880 #define FUNCTION_NAME __gconv_transform_internal_utf8
881 #define ONE_DIRECTION 1
883 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
884 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
885 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
886 #define LOOPFCT FROM_LOOP
889 uint32_t wc = *((const uint32_t *) inptr); \
891 if (__glibc_likely (wc < 0x80)) \
892 /* It's an one byte sequence. */ \
893 *outptr++ = (unsigned char) wc; \
894 else if (__glibc_likely (wc <= 0x7fffffff \
895 && (wc < 0xd800 || wc > 0xdfff))) \
898 unsigned char *start; \
900 for (step = 2; step < 6; ++step) \
901 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
904 if (__glibc_unlikely (outptr + step > outend)) \
907 result = __GCONV_FULL_OUTPUT; \
912 *outptr = (unsigned char) (~0xff >> step); \
916 start[--step] = 0x80 | (wc & 0x3f); \
924 STANDARD_TO_LOOP_ERR_HANDLER (4); \
929 #define LOOP_NEED_FLAGS
930 #include <iconv/loop.c>
931 #include <iconv/skeleton.c>
934 /* Convert from UTF-8 to the internal (UCS4-like) format. */
935 #define DEFINE_INIT 0
936 #define DEFINE_FINI 0
937 #define MIN_NEEDED_FROM 1
938 #define MAX_NEEDED_FROM 6
939 #define MIN_NEEDED_TO 4
940 #define FROM_DIRECTION 1
941 #define FROM_LOOP utf8_internal_loop
942 #define TO_LOOP utf8_internal_loop /* This is not used. */
943 #define FUNCTION_NAME __gconv_transform_utf8_internal
944 #define ONE_DIRECTION 1
946 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
947 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
948 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
949 #define LOOPFCT FROM_LOOP
952 /* Next input byte. */ \
953 uint32_t ch = *inptr; \
955 if (__glibc_likely (ch < 0x80)) \
957 /* One byte sequence. */ \
965 if (ch >= 0xc2 && ch < 0xe0) \
967 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
968 otherwise the wide character could have been represented \
969 using a single byte. */ \
973 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
975 /* We expect three bytes. */ \
979 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
981 /* We expect four bytes. */ \
985 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
987 /* We expect five bytes. */ \
991 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
993 /* We expect six bytes. */ \
999 /* Search the end of this ill-formed UTF-8 character. This \
1000 is the next byte with (x & 0xc0) != 0x80. */ \
1004 while (inptr + i < inend \
1005 && (*(inptr + i) & 0xc0) == 0x80 \
1009 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1012 if (__glibc_unlikely (inptr + cnt > inend)) \
1014 /* We don't have enough input. But before we report that check \
1015 that all the bytes are correct. */ \
1016 for (i = 1; inptr + i < inend; ++i) \
1017 if ((inptr[i] & 0xc0) != 0x80) \
1020 if (__glibc_likely (inptr + i == inend)) \
1022 result = __GCONV_INCOMPLETE_INPUT; \
1029 /* Read the possible remaining bytes. */ \
1030 for (i = 1; i < cnt; ++i) \
1032 uint32_t byte = inptr[i]; \
1034 if ((byte & 0xc0) != 0x80) \
1035 /* This is an illegal encoding. */ \
1039 ch |= byte & 0x3f; \
1042 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1043 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1044 have been represented with fewer than cnt bytes. */ \
1045 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1046 /* Do not accept UTF-16 surrogates. */ \
1047 || (ch >= 0xd800 && ch <= 0xdfff)) \
1049 /* This is an illegal encoding. */ \
1056 /* Now adjust the pointers and store the result. */ \
1057 *((uint32_t *) outptr) = ch; \
1058 outptr += sizeof (uint32_t); \
1060 #define LOOP_NEED_FLAGS
1062 #define STORE_REST \
1064 /* We store the remaining bytes while converting them into the UCS4 \
1065 format. We can assume that the first byte in the buffer is \
1066 correct and that it requires a larger number of bytes than there \
1067 are in the input buffer. */ \
1068 wint_t ch = **inptrp; \
1071 state->__count = inend - *inptrp; \
1073 assert (ch != 0xc0 && ch != 0xc1); \
1074 if (ch >= 0xc2 && ch < 0xe0) \
1076 /* We expect two bytes. The first byte cannot be 0xc0 or \
1077 0xc1, otherwise the wide character could have been \
1078 represented using a single byte. */ \
1082 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
1084 /* We expect three bytes. */ \
1088 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
1090 /* We expect four bytes. */ \
1094 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
1096 /* We expect five bytes. */ \
1102 /* We expect six bytes. */ \
1107 /* The first byte is already consumed. */ \
1109 while (++(*inptrp) < inend) \
1112 ch |= **inptrp & 0x3f; \
1116 /* Shift for the so far missing bytes. */ \
1119 /* Store the number of bytes expected for the entire sequence. */ \
1120 state->__count |= cnt << 8; \
1122 /* Store the value. */ \
1123 state->__value.__wch = ch; \
1126 #define UNPACK_BYTES \
1128 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1129 wint_t wch = state->__value.__wch; \
1130 size_t ntotal = state->__count >> 8; \
1132 inlen = state->__count & 255; \
1134 bytebuf[0] = inmask[ntotal - 2]; \
1138 if (--ntotal < inlen) \
1139 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1142 while (ntotal > 1); \
1144 bytebuf[0] |= wch; \
1147 #define CLEAR_STATE \
1151 #include <iconv/loop.c>
1152 #include <iconv/skeleton.c>
1155 /* Convert from UCS2 to the internal (UCS4-like) format. */
1156 #define DEFINE_INIT 0
1157 #define DEFINE_FINI 0
1158 #define MIN_NEEDED_FROM 2
1159 #define MIN_NEEDED_TO 4
1160 #define FROM_DIRECTION 1
1161 #define FROM_LOOP ucs2_internal_loop
1162 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1163 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1164 #define ONE_DIRECTION 1
1166 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1167 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1168 #define LOOPFCT FROM_LOOP
1171 uint16_t u1 = get16 (inptr); \
1173 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1175 /* Surrogate characters in UCS-2 input are not valid. Reject \
1176 them. (Catching this here is not security relevant.) */ \
1177 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1180 *((uint32_t *) outptr) = u1; \
1181 outptr += sizeof (uint32_t); \
1184 #define LOOP_NEED_FLAGS
1185 #include <iconv/loop.c>
1186 #include <iconv/skeleton.c>
1189 /* Convert from the internal (UCS4-like) format to UCS2. */
1190 #define DEFINE_INIT 0
1191 #define DEFINE_FINI 0
1192 #define MIN_NEEDED_FROM 4
1193 #define MIN_NEEDED_TO 2
1194 #define FROM_DIRECTION 1
1195 #define FROM_LOOP internal_ucs2_loop
1196 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1197 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1198 #define ONE_DIRECTION 1
1200 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1201 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1202 #define LOOPFCT FROM_LOOP
1205 uint32_t val = *((const uint32_t *) inptr); \
1207 if (__glibc_unlikely (val >= 0x10000)) \
1209 UNICODE_TAG_HANDLER (val, 4); \
1210 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1212 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1214 /* Surrogate characters in UCS-4 input are not valid. \
1215 We must catch this, because the UCS-2 output might be \
1216 interpreted as UTF-16 by other programs. If we let \
1217 surrogates pass through, attackers could make a security \
1218 hole exploit by synthesizing any desired plane 1-16 \
1220 result = __GCONV_ILLEGAL_INPUT; \
1221 if (! ignore_errors_p ()) \
1229 put16 (outptr, val); \
1230 outptr += sizeof (uint16_t); \
1234 #define LOOP_NEED_FLAGS
1235 #include <iconv/loop.c>
1236 #include <iconv/skeleton.c>
1239 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1240 #define DEFINE_INIT 0
1241 #define DEFINE_FINI 0
1242 #define MIN_NEEDED_FROM 2
1243 #define MIN_NEEDED_TO 4
1244 #define FROM_DIRECTION 1
1245 #define FROM_LOOP ucs2reverse_internal_loop
1246 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1247 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1248 #define ONE_DIRECTION 1
1250 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1251 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1252 #define LOOPFCT FROM_LOOP
1255 uint16_t u1 = bswap_16 (get16 (inptr)); \
1257 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
1259 /* Surrogate characters in UCS-2 input are not valid. Reject \
1260 them. (Catching this here is not security relevant.) */ \
1261 if (! ignore_errors_p ()) \
1263 result = __GCONV_ILLEGAL_INPUT; \
1271 *((uint32_t *) outptr) = u1; \
1272 outptr += sizeof (uint32_t); \
1275 #define LOOP_NEED_FLAGS
1276 #include <iconv/loop.c>
1277 #include <iconv/skeleton.c>
1280 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1281 #define DEFINE_INIT 0
1282 #define DEFINE_FINI 0
1283 #define MIN_NEEDED_FROM 4
1284 #define MIN_NEEDED_TO 2
1285 #define FROM_DIRECTION 1
1286 #define FROM_LOOP internal_ucs2reverse_loop
1287 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1288 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1289 #define ONE_DIRECTION 1
1291 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1292 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1293 #define LOOPFCT FROM_LOOP
1296 uint32_t val = *((const uint32_t *) inptr); \
1297 if (__glibc_unlikely (val >= 0x10000)) \
1299 UNICODE_TAG_HANDLER (val, 4); \
1300 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1302 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
1304 /* Surrogate characters in UCS-4 input are not valid. \
1305 We must catch this, because the UCS-2 output might be \
1306 interpreted as UTF-16 by other programs. If we let \
1307 surrogates pass through, attackers could make a security \
1308 hole exploit by synthesizing any desired plane 1-16 \
1310 if (! ignore_errors_p ()) \
1312 result = __GCONV_ILLEGAL_INPUT; \
1321 put16 (outptr, bswap_16 (val)); \
1322 outptr += sizeof (uint16_t); \
1326 #define LOOP_NEED_FLAGS
1327 #include <iconv/loop.c>
1328 #include <iconv/skeleton.c>