Support inline syscall with six arguments.
[glibc.git] / iconv / gconv_simple.c
blob35346aa4987efbc742d47ea7abea1c2cde2f1b56
1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \
34 MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 __const unsigned char **, __const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
41 #ifndef EILSEQ
42 # define EILSEQ EINVAL
43 #endif
46 /* Transform from the internal, UCS4-like format, to UCS4. The
47 difference between the internal ucs4 format and the real UCS4
48 format is, if any, the endianess. The Unicode/ISO 10646 says that
49 unless some higher protocol specifies it differently, the byte
50 order is big endian.*/
51 #define DEFINE_INIT 0
52 #define DEFINE_FINI 0
53 #define MIN_NEEDED_FROM 4
54 #define MIN_NEEDED_TO 4
55 #define FROM_DIRECTION 1
56 #define FROM_LOOP internal_ucs4_loop
57 #define TO_LOOP internal_ucs4_loop /* This is not used. */
58 #define FUNCTION_NAME __gconv_transform_internal_ucs4
61 static inline int
62 internal_ucs4_loop (struct __gconv_step *step,
63 struct __gconv_step_data *step_data,
64 const unsigned char **inptrp, const unsigned char *inend,
65 unsigned char **outptrp, unsigned char *outend,
66 size_t *irreversible)
68 const unsigned char *inptr = *inptrp;
69 unsigned char *outptr = *outptrp;
70 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
71 int result;
73 #if __BYTE_ORDER == __LITTLE_ENDIAN
74 /* Sigh, we have to do some real work. */
75 size_t cnt;
77 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
78 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
80 *inptrp = inptr;
81 *outptrp = outptr;
82 #elif __BYTE_ORDER == __BIG_ENDIAN
83 /* Simply copy the data. */
84 *inptrp = inptr + n_convert * 4;
85 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
86 #else
87 # error "This endianess is not supported."
88 #endif
90 /* Determine the status. */
91 if (*inptrp == inend)
92 result = __GCONV_EMPTY_INPUT;
93 else if (*outptrp + 4 > outend)
94 result = __GCONV_FULL_OUTPUT;
95 else
96 result = __GCONV_INCOMPLETE_INPUT;
98 return result;
101 #ifndef _STRING_ARCH_unaligned
102 static inline int
103 internal_ucs4_loop_unaligned (struct __gconv_step *step,
104 struct __gconv_step_data *step_data,
105 const unsigned char **inptrp,
106 const unsigned char *inend,
107 unsigned char **outptrp, unsigned char *outend,
108 size_t *irreversible)
110 const unsigned char *inptr = *inptrp;
111 unsigned char *outptr = *outptrp;
112 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
113 int result;
115 # if __BYTE_ORDER == __LITTLE_ENDIAN
116 /* Sigh, we have to do some real work. */
117 size_t cnt;
119 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
121 outptr[0] = inptr[3];
122 outptr[1] = inptr[2];
123 outptr[2] = inptr[1];
124 outptr[3] = inptr[0];
127 *inptrp = inptr;
128 *outptrp = outptr;
129 # elif __BYTE_ORDER == __BIG_ENDIAN
130 /* Simply copy the data. */
131 *inptrp = inptr + n_convert * 4;
132 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
133 # else
134 # error "This endianess is not supported."
135 # endif
137 /* Determine the status. */
138 if (*inptrp == inend)
139 result = __GCONV_EMPTY_INPUT;
140 else if (*outptrp + 4 > outend)
141 result = __GCONV_FULL_OUTPUT;
142 else
143 result = __GCONV_INCOMPLETE_INPUT;
145 return result;
147 #endif
150 static inline int
151 internal_ucs4_loop_single (struct __gconv_step *step,
152 struct __gconv_step_data *step_data,
153 const unsigned char **inptrp,
154 const unsigned char *inend,
155 unsigned char **outptrp, unsigned char *outend,
156 size_t *irreversible)
158 mbstate_t *state = step_data->__statep;
159 size_t cnt = state->__count & 7;
161 while (*inptrp < inend && cnt < 4)
162 state->__value.__wchb[cnt++] = *(*inptrp)++;
164 if (__builtin_expect (cnt < 4, 0))
166 /* Still not enough bytes. Store the ones in the input buffer. */
167 state->__count &= ~7;
168 state->__count |= cnt;
170 return __GCONV_INCOMPLETE_INPUT;
173 #if __BYTE_ORDER == __LITTLE_ENDIAN
174 (*outptrp)[0] = state->__value.__wchb[3];
175 (*outptrp)[1] = state->__value.__wchb[2];
176 (*outptrp)[2] = state->__value.__wchb[1];
177 (*outptrp)[3] = state->__value.__wchb[0];
179 *outptrp += 4;
180 #elif __BYTE_ORDER == __BIG_ENDIAN
181 /* XXX unaligned */
182 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
183 #else
184 # error "This endianess is not supported."
185 #endif
187 /* Clear the state buffer. */
188 state->__count &= ~7;
190 return __GCONV_OK;
193 #include <iconv/skeleton.c>
196 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
197 for the other direction we have to check for correct values here. */
198 #define DEFINE_INIT 0
199 #define DEFINE_FINI 0
200 #define MIN_NEEDED_FROM 4
201 #define MIN_NEEDED_TO 4
202 #define FROM_DIRECTION 1
203 #define FROM_LOOP ucs4_internal_loop
204 #define TO_LOOP ucs4_internal_loop /* This is not used. */
205 #define FUNCTION_NAME __gconv_transform_ucs4_internal
208 static inline int
209 ucs4_internal_loop (struct __gconv_step *step,
210 struct __gconv_step_data *step_data,
211 const unsigned char **inptrp, const unsigned char *inend,
212 unsigned char **outptrp, unsigned char *outend,
213 size_t *irreversible)
215 int flags = step_data->__flags;
216 const unsigned char *inptr = *inptrp;
217 unsigned char *outptr = *outptrp;
218 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
219 int result;
220 size_t cnt;
222 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
224 uint32_t inval;
226 #if __BYTE_ORDER == __LITTLE_ENDIAN
227 inval = bswap_32 (*(const uint32_t *) inptr);
228 #else
229 inval = *(const uint32_t *) inptr;
230 #endif
232 if (__builtin_expect (inval > 0x7fffffff, 0))
234 /* The value is too large. We don't try transliteration here since
235 this is not an error because of the lack of possibilities to
236 represent the result. This is a genuine bug in the input since
237 UCS4 does not allow such values. */
238 if (irreversible == NULL)
239 /* We are transliterating, don't try to correct anything. */
240 return __GCONV_ILLEGAL_INPUT;
242 if (flags & __GCONV_IGNORE_ERRORS)
244 /* Just ignore this character. */
245 ++*irreversible;
246 continue;
249 *inptrp = inptr;
250 *outptrp = outptr;
251 return __GCONV_ILLEGAL_INPUT;
254 *((uint32_t *) outptr)++ = inval;
257 *inptrp = inptr;
258 *outptrp = outptr;
260 /* Determine the status. */
261 if (*inptrp == inend)
262 result = __GCONV_EMPTY_INPUT;
263 else if (*outptrp + 4 > outend)
264 result = __GCONV_FULL_OUTPUT;
265 else
266 result = __GCONV_INCOMPLETE_INPUT;
268 return result;
271 #ifndef _STRING_ARCH_unaligned
272 static inline int
273 ucs4_internal_loop_unaligned (struct __gconv_step *step,
274 struct __gconv_step_data *step_data,
275 const unsigned char **inptrp,
276 const unsigned char *inend,
277 unsigned char **outptrp, unsigned char *outend,
278 size_t *irreversible)
280 int flags = step_data->__flags;
281 const unsigned char *inptr = *inptrp;
282 unsigned char *outptr = *outptrp;
283 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
284 int result;
285 size_t cnt;
287 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
289 if (__builtin_expect (inptr[0] > 0x80, 0))
291 /* The value is too large. We don't try transliteration here since
292 this is not an error because of the lack of possibilities to
293 represent the result. This is a genuine bug in the input since
294 UCS4 does not allow such values. */
295 if (irreversible == NULL)
296 /* We are transliterating, don't try to correct anything. */
297 return __GCONV_ILLEGAL_INPUT;
299 if (flags & __GCONV_IGNORE_ERRORS)
301 /* Just ignore this character. */
302 ++*irreversible;
303 continue;
306 *inptrp = inptr;
307 *outptrp = outptr;
308 return __GCONV_ILLEGAL_INPUT;
311 # if __BYTE_ORDER == __LITTLE_ENDIAN
312 outptr[3] = inptr[0];
313 outptr[2] = inptr[1];
314 outptr[1] = inptr[2];
315 outptr[0] = inptr[3];
316 # else
317 outptr[0] = inptr[0];
318 outptr[1] = inptr[1];
319 outptr[2] = inptr[2];
320 outptr[3] = inptr[3];
321 # endif
322 outptr += 4;
325 *inptrp = inptr;
326 *outptrp = outptr;
328 /* Determine the status. */
329 if (*inptrp == inend)
330 result = __GCONV_EMPTY_INPUT;
331 else if (*outptrp + 4 > outend)
332 result = __GCONV_FULL_OUTPUT;
333 else
334 result = __GCONV_INCOMPLETE_INPUT;
336 return result;
338 #endif
341 static inline int
342 ucs4_internal_loop_single (struct __gconv_step *step,
343 struct __gconv_step_data *step_data,
344 const unsigned char **inptrp,
345 const unsigned char *inend,
346 unsigned char **outptrp, unsigned char *outend,
347 size_t *irreversible)
349 mbstate_t *state = step_data->__statep;
350 int flags = step_data->__flags;
351 size_t cnt = state->__count & 7;
353 while (*inptrp < inend && cnt < 4)
354 state->__value.__wchb[cnt++] = *(*inptrp)++;
356 if (__builtin_expect (cnt < 4, 0))
358 /* Still not enough bytes. Store the ones in the input buffer. */
359 state->__count &= ~7;
360 state->__count |= cnt;
362 return __GCONV_INCOMPLETE_INPUT;
365 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
368 /* The value is too large. We don't try transliteration here since
369 this is not an error because of the lack of possibilities to
370 represent the result. This is a genuine bug in the input since
371 UCS4 does not allow such values. */
372 if (!(flags & __GCONV_IGNORE_ERRORS))
374 *inptrp -= cnt - (state->__count & 7);
375 return __GCONV_ILLEGAL_INPUT;
378 else
380 #if __BYTE_ORDER == __LITTLE_ENDIAN
381 (*outptrp)[0] = state->__value.__wchb[3];
382 (*outptrp)[1] = state->__value.__wchb[2];
383 (*outptrp)[2] = state->__value.__wchb[1];
384 (*outptrp)[3] = state->__value.__wchb[0];
385 #elif __BYTE_ORDER == __BIG_ENDIAN
386 (*outptrp)[0] = state->__value.__wchb[0];
387 (*outptrp)[1] = state->__value.__wchb[1];
388 (*outptrp)[2] = state->__value.__wchb[2];
389 (*outptrp)[3] = state->__value.__wchb[3];
390 #endif
392 *outptrp += 4;
395 /* Clear the state buffer. */
396 state->__count &= ~7;
398 return __GCONV_OK;
401 #include <iconv/skeleton.c>
404 /* Similarly for the little endian form. */
405 #define DEFINE_INIT 0
406 #define DEFINE_FINI 0
407 #define MIN_NEEDED_FROM 4
408 #define MIN_NEEDED_TO 4
409 #define FROM_DIRECTION 1
410 #define FROM_LOOP internal_ucs4le_loop
411 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
412 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
415 static inline int
416 internal_ucs4le_loop (struct __gconv_step *step,
417 struct __gconv_step_data *step_data,
418 const unsigned char **inptrp, const unsigned char *inend,
419 unsigned char **outptrp, unsigned char *outend,
420 size_t *irreversible)
422 const unsigned char *inptr = *inptrp;
423 unsigned char *outptr = *outptrp;
424 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
425 int result;
427 #if __BYTE_ORDER == __BIG_ENDIAN
428 /* Sigh, we have to do some real work. */
429 size_t cnt;
431 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
432 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
434 *inptrp = inptr;
435 *outptrp = outptr;
436 #elif __BYTE_ORDER == __LITTLE_ENDIAN
437 /* Simply copy the data. */
438 *inptrp = inptr + n_convert * 4;
439 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
440 #else
441 # error "This endianess is not supported."
442 #endif
444 /* Determine the status. */
445 if (*inptrp == inend)
446 result = __GCONV_EMPTY_INPUT;
447 else if (*outptrp + 4 > outend)
448 result = __GCONV_FULL_OUTPUT;
449 else
450 result = __GCONV_INCOMPLETE_INPUT;
452 return result;
455 #ifndef _STRING_ARCH_unaligned
456 static inline int
457 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
458 struct __gconv_step_data *step_data,
459 const unsigned char **inptrp,
460 const unsigned char *inend,
461 unsigned char **outptrp, unsigned char *outend,
462 size_t *irreversible)
464 const unsigned char *inptr = *inptrp;
465 unsigned char *outptr = *outptrp;
466 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
467 int result;
469 # if __BYTE_ORDER == __BIG_ENDIAN
470 /* Sigh, we have to do some real work. */
471 size_t cnt;
473 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
475 outptr[0] = inptr[3];
476 outptr[1] = inptr[2];
477 outptr[2] = inptr[1];
478 outptr[3] = inptr[0];
481 *inptrp = inptr;
482 *outptrp = outptr;
483 # elif __BYTE_ORDER == __LITTLE_ENDIAN
484 /* Simply copy the data. */
485 *inptrp = inptr + n_convert * 4;
486 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
487 # else
488 # error "This endianess is not supported."
489 # endif
491 /* Determine the status. */
492 if (*inptrp == inend)
493 result = __GCONV_EMPTY_INPUT;
494 else if (*inptrp + 4 > inend)
495 result = __GCONV_INCOMPLETE_INPUT;
496 else
498 assert (*outptrp + 4 > outend);
499 result = __GCONV_FULL_OUTPUT;
502 return result;
504 #endif
507 static inline int
508 internal_ucs4le_loop_single (struct __gconv_step *step,
509 struct __gconv_step_data *step_data,
510 const unsigned char **inptrp,
511 const unsigned char *inend,
512 unsigned char **outptrp, unsigned char *outend,
513 size_t *irreversible)
515 mbstate_t *state = step_data->__statep;
516 size_t cnt = state->__count & 7;
518 while (*inptrp < inend && cnt < 4)
519 state->__value.__wchb[cnt++] = *(*inptrp)++;
521 if (__builtin_expect (cnt < 4, 0))
523 /* Still not enough bytes. Store the ones in the input buffer. */
524 state->__count &= ~7;
525 state->__count |= cnt;
527 return __GCONV_INCOMPLETE_INPUT;
530 #if __BYTE_ORDER == __BIG_ENDIAN
531 (*outptrp)[0] = state->__value.__wchb[3];
532 (*outptrp)[1] = state->__value.__wchb[2];
533 (*outptrp)[2] = state->__value.__wchb[1];
534 (*outptrp)[3] = state->__value.__wchb[0];
536 *outptrp += 4;
537 #else
538 /* XXX unaligned */
539 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
540 #endif
542 /* Clear the state buffer. */
543 state->__count &= ~7;
545 return __GCONV_OK;
548 #include <iconv/skeleton.c>
551 /* And finally from UCS4-LE to the internal encoding. */
552 #define DEFINE_INIT 0
553 #define DEFINE_FINI 0
554 #define MIN_NEEDED_FROM 4
555 #define MIN_NEEDED_TO 4
556 #define FROM_DIRECTION 1
557 #define FROM_LOOP ucs4le_internal_loop
558 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
559 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
562 static inline int
563 ucs4le_internal_loop (struct __gconv_step *step,
564 struct __gconv_step_data *step_data,
565 const unsigned char **inptrp, const unsigned char *inend,
566 unsigned char **outptrp, unsigned char *outend,
567 size_t *irreversible)
569 int flags = step_data->__flags;
570 const unsigned char *inptr = *inptrp;
571 unsigned char *outptr = *outptrp;
572 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
573 int result;
574 size_t cnt;
576 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
578 uint32_t inval;
580 #if __BYTE_ORDER == __BIG_ENDIAN
581 inval = bswap_32 (*(const uint32_t *) inptr);
582 #else
583 inval = *(const uint32_t *) inptr;
584 #endif
586 if (__builtin_expect (inval > 0x7fffffff, 0))
588 /* The value is too large. We don't try transliteration here since
589 this is not an error because of the lack of possibilities to
590 represent the result. This is a genuine bug in the input since
591 UCS4 does not allow such values. */
592 if (irreversible == NULL)
593 /* We are transliterating, don't try to correct anything. */
594 return __GCONV_ILLEGAL_INPUT;
596 if (flags & __GCONV_IGNORE_ERRORS)
598 /* Just ignore this character. */
599 ++*irreversible;
600 continue;
603 return __GCONV_ILLEGAL_INPUT;
606 *((uint32_t *) outptr)++ = inval;
609 *inptrp = inptr;
610 *outptrp = outptr;
612 /* Determine the status. */
613 if (*inptrp == inend)
614 result = __GCONV_EMPTY_INPUT;
615 else if (*inptrp + 4 > inend)
616 result = __GCONV_INCOMPLETE_INPUT;
617 else
619 assert (*outptrp + 4 > outend);
620 result = __GCONV_FULL_OUTPUT;
623 return result;
626 #ifndef _STRING_ARCH_unaligned
627 static inline int
628 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
629 struct __gconv_step_data *step_data,
630 const unsigned char **inptrp,
631 const unsigned char *inend,
632 unsigned char **outptrp, unsigned char *outend,
633 size_t *irreversible)
635 int flags = step_data->__flags;
636 const unsigned char *inptr = *inptrp;
637 unsigned char *outptr = *outptrp;
638 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
639 int result;
640 size_t cnt;
642 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
644 if (__builtin_expect (inptr[3] > 0x80, 0))
646 /* The value is too large. We don't try transliteration here since
647 this is not an error because of the lack of possibilities to
648 represent the result. This is a genuine bug in the input since
649 UCS4 does not allow such values. */
650 if (irreversible == NULL)
651 /* We are transliterating, don't try to correct anything. */
652 return __GCONV_ILLEGAL_INPUT;
654 if (flags & __GCONV_IGNORE_ERRORS)
656 /* Just ignore this character. */
657 ++*irreversible;
658 continue;
661 *inptrp = inptr;
662 *outptrp = outptr;
663 return __GCONV_ILLEGAL_INPUT;
666 # if __BYTE_ORDER == __BIG_ENDIAN
667 outptr[3] = inptr[0];
668 outptr[2] = inptr[1];
669 outptr[1] = inptr[2];
670 outptr[0] = inptr[3];
671 # else
672 outptr[0] = inptr[0];
673 outptr[1] = inptr[1];
674 outptr[2] = inptr[2];
675 outptr[3] = inptr[3];
676 # endif
678 outptr += 4;
681 *inptrp = inptr;
682 *outptrp = outptr;
684 /* Determine the status. */
685 if (*inptrp == inend)
686 result = __GCONV_EMPTY_INPUT;
687 else if (*inptrp + 4 > inend)
688 result = __GCONV_INCOMPLETE_INPUT;
689 else
691 assert (*outptrp + 4 > outend);
692 result = __GCONV_FULL_OUTPUT;
695 return result;
697 #endif
700 static inline int
701 ucs4le_internal_loop_single (struct __gconv_step *step,
702 struct __gconv_step_data *step_data,
703 const unsigned char **inptrp,
704 const unsigned char *inend,
705 unsigned char **outptrp, unsigned char *outend,
706 size_t *irreversible)
708 mbstate_t *state = step_data->__statep;
709 int flags = step_data->__flags;
710 size_t cnt = state->__count & 7;
712 while (*inptrp < inend && cnt < 4)
713 state->__value.__wchb[cnt++] = *(*inptrp)++;
715 if (__builtin_expect (cnt < 4, 0))
717 /* Still not enough bytes. Store the ones in the input buffer. */
718 state->__count &= ~7;
719 state->__count |= cnt;
721 return __GCONV_INCOMPLETE_INPUT;
724 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
727 /* The value is too large. We don't try transliteration here since
728 this is not an error because of the lack of possibilities to
729 represent the result. This is a genuine bug in the input since
730 UCS4 does not allow such values. */
731 if (!(flags & __GCONV_IGNORE_ERRORS))
732 return __GCONV_ILLEGAL_INPUT;
734 else
736 #if __BYTE_ORDER == __BIG_ENDIAN
737 (*outptrp)[0] = state->__value.__wchb[3];
738 (*outptrp)[1] = state->__value.__wchb[2];
739 (*outptrp)[2] = state->__value.__wchb[1];
740 (*outptrp)[3] = state->__value.__wchb[0];
741 #else
742 (*outptrp)[0] = state->__value.__wchb[0];
743 (*outptrp)[1] = state->__value.__wchb[1];
744 (*outptrp)[2] = state->__value.__wchb[2];
745 (*outptrp)[3] = state->__value.__wchb[3];
746 #endif
748 *outptrp += 4;
751 /* Clear the state buffer. */
752 state->__count &= ~7;
754 return __GCONV_OK;
757 #include <iconv/skeleton.c>
760 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
761 #define DEFINE_INIT 0
762 #define DEFINE_FINI 0
763 #define MIN_NEEDED_FROM 1
764 #define MIN_NEEDED_TO 4
765 #define FROM_DIRECTION 1
766 #define FROM_LOOP ascii_internal_loop
767 #define TO_LOOP ascii_internal_loop /* This is not used. */
768 #define FUNCTION_NAME __gconv_transform_ascii_internal
769 #define ONE_DIRECTION 1
771 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
772 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
773 #define LOOPFCT FROM_LOOP
774 #define BODY \
776 if (__builtin_expect (*inptr > '\x7f', 0)) \
778 /* The value is too large. We don't try transliteration here since \
779 this is not an error because of the lack of possibilities to \
780 represent the result. This is a genuine bug in the input since \
781 ASCII does not allow such values. */ \
782 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
784 else \
785 /* It's an one byte sequence. */ \
786 *((uint32_t *) outptr)++ = *inptr++; \
788 #define LOOP_NEED_FLAGS
789 #include <iconv/loop.c>
790 #include <iconv/skeleton.c>
793 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
794 #define DEFINE_INIT 0
795 #define DEFINE_FINI 0
796 #define MIN_NEEDED_FROM 4
797 #define MIN_NEEDED_TO 1
798 #define FROM_DIRECTION 1
799 #define FROM_LOOP internal_ascii_loop
800 #define TO_LOOP internal_ascii_loop /* This is not used. */
801 #define FUNCTION_NAME __gconv_transform_internal_ascii
802 #define ONE_DIRECTION 1
804 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
805 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
806 #define LOOPFCT FROM_LOOP
807 #define BODY \
809 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
811 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
812 STANDARD_TO_LOOP_ERR_HANDLER (4); \
814 else \
815 /* It's an one byte sequence. */ \
816 *outptr++ = *((const uint32_t *) inptr)++; \
818 #define LOOP_NEED_FLAGS
819 #include <iconv/loop.c>
820 #include <iconv/skeleton.c>
823 /* Convert from the internal (UCS4-like) format to UTF-8. */
824 #define DEFINE_INIT 0
825 #define DEFINE_FINI 0
826 #define MIN_NEEDED_FROM 4
827 #define MIN_NEEDED_TO 1
828 #define MAX_NEEDED_TO 6
829 #define FROM_DIRECTION 1
830 #define FROM_LOOP internal_utf8_loop
831 #define TO_LOOP internal_utf8_loop /* This is not used. */
832 #define FUNCTION_NAME __gconv_transform_internal_utf8
833 #define ONE_DIRECTION 1
835 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
836 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
837 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
838 #define LOOPFCT FROM_LOOP
839 #define BODY \
841 uint32_t wc = *((const uint32_t *) inptr); \
843 if (wc < 0x80) \
844 /* It's an one byte sequence. */ \
845 *outptr++ = (unsigned char) wc; \
846 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
848 size_t step; \
849 char *start; \
851 for (step = 2; step < 6; ++step) \
852 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
853 break; \
855 if (__builtin_expect (outptr + step > outend, 0)) \
857 /* Too long. */ \
858 result = __GCONV_FULL_OUTPUT; \
859 break; \
862 start = outptr; \
863 *outptr = (unsigned char) (~0xff >> step); \
864 outptr += step; \
865 --step; \
866 do \
868 start[step] = 0x80 | (wc & 0x3f); \
869 wc >>= 6; \
871 while (--step > 0); \
872 start[0] |= wc; \
874 else \
876 STANDARD_TO_LOOP_ERR_HANDLER (4); \
879 inptr += 4; \
881 #define LOOP_NEED_FLAGS
882 #include <iconv/loop.c>
883 #include <iconv/skeleton.c>
886 /* Convert from UTF-8 to the internal (UCS4-like) format. */
887 #define DEFINE_INIT 0
888 #define DEFINE_FINI 0
889 #define MIN_NEEDED_FROM 1
890 #define MAX_NEEDED_FROM 6
891 #define MIN_NEEDED_TO 4
892 #define FROM_DIRECTION 1
893 #define FROM_LOOP utf8_internal_loop
894 #define TO_LOOP utf8_internal_loop /* This is not used. */
895 #define FUNCTION_NAME __gconv_transform_utf8_internal
896 #define ONE_DIRECTION 1
898 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
899 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
900 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
901 #define LOOPFCT FROM_LOOP
902 #define BODY \
904 uint32_t ch; \
905 uint_fast32_t cnt; \
906 uint_fast32_t i; \
908 /* Next input byte. */ \
909 ch = *inptr; \
911 if (ch < 0x80) \
913 /* One byte sequence. */ \
914 cnt = 1; \
915 ++inptr; \
917 else \
919 if (ch >= 0xc2 && ch < 0xe0) \
921 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
922 otherwise the wide character could have been represented \
923 using a single byte. */ \
924 cnt = 2; \
925 ch &= 0x1f; \
927 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
929 /* We expect three bytes. */ \
930 cnt = 3; \
931 ch &= 0x0f; \
933 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
935 /* We expect four bytes. */ \
936 cnt = 4; \
937 ch &= 0x07; \
939 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
941 /* We expect five bytes. */ \
942 cnt = 5; \
943 ch &= 0x03; \
945 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
947 /* We expect six bytes. */ \
948 cnt = 6; \
949 ch &= 0x01; \
951 else \
953 int skipped; \
955 /* Search the end of this ill-formed UTF-8 character. This \
956 is the next byte with (x & 0xc0) != 0x80. */ \
957 skipped = 0; \
958 do \
959 ++skipped; \
960 while (inptr + skipped < inend \
961 && (*(inptr + skipped) & 0xc0) == 0x80 \
962 && skipped < 5); \
964 STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
967 if (__builtin_expect (inptr + cnt > inend, 0)) \
969 /* We don't have enough input. But before we report that check \
970 that all the bytes are correct. */ \
971 for (i = 1; inptr + i < inend; ++i) \
972 if ((inptr[i] & 0xc0) != 0x80) \
973 break; \
975 if (__builtin_expect (inptr + i == inend, 1)) \
977 result = __GCONV_INCOMPLETE_INPUT; \
978 break; \
981 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
984 /* Read the possible remaining bytes. */ \
985 for (i = 1; i < cnt; ++i) \
987 uint32_t byte = inptr[i]; \
989 if ((byte & 0xc0) != 0x80) \
990 /* This is an illegal encoding. */ \
991 break; \
993 ch <<= 6; \
994 ch |= byte & 0x3f; \
997 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
998 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
999 have been represented with fewer than cnt bytes. */ \
1000 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1002 /* This is an illegal encoding. */ \
1003 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1006 inptr += cnt; \
1009 /* Now adjust the pointers and store the result. */ \
1010 *((uint32_t *) outptr)++ = ch; \
1012 #define LOOP_NEED_FLAGS
1014 #define STORE_REST \
1016 /* We store the remaining bytes while converting them into the UCS4 \
1017 format. We can assume that the first byte in the buffer is \
1018 correct and that it requires a larger number of bytes than there \
1019 are in the input buffer. */ \
1020 wint_t ch = **inptrp; \
1021 size_t cnt; \
1023 state->__count = inend - *inptrp; \
1025 if (ch >= 0xc2 && ch < 0xe0) \
1027 /* We expect two bytes. The first byte cannot be 0xc0 or \
1028 0xc1, otherwise the wide character could have been \
1029 represented using a single byte. */ \
1030 cnt = 2; \
1031 ch &= 0x1f; \
1033 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1035 /* We expect three bytes. */ \
1036 cnt = 3; \
1037 ch &= 0x0f; \
1039 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1041 /* We expect four bytes. */ \
1042 cnt = 4; \
1043 ch &= 0x07; \
1045 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1047 /* We expect five bytes. */ \
1048 cnt = 5; \
1049 ch &= 0x03; \
1051 else \
1053 /* We expect six bytes. */ \
1054 cnt = 6; \
1055 ch &= 0x01; \
1058 /* The first byte is already consumed. */ \
1059 --cnt; \
1060 while (++(*inptrp) < inend) \
1062 ch <<= 6; \
1063 ch |= **inptrp & 0x3f; \
1064 --cnt; \
1067 /* Shift for the so far missing bytes. */ \
1068 ch <<= cnt * 6; \
1070 /* Store the value. */ \
1071 state->__value.__wch = ch; \
1074 #define UNPACK_BYTES \
1076 wint_t wch = state->__value.__wch; \
1077 size_t ntotal; \
1078 inlen = state->__count; \
1080 if (state->__value.__wch <= 0x7ff) \
1082 bytebuf[0] = 0xc0; \
1083 ntotal = 2; \
1085 else if (__builtin_expect (state->__value.__wch <= 0xffff, 1)) \
1087 bytebuf[0] = 0xe0; \
1088 ntotal = 3; \
1090 else if (__builtin_expect (state->__value.__wch < 0x1fffff, 1)) \
1092 bytebuf[0] = 0xf0; \
1093 ntotal = 4; \
1095 else if (__builtin_expect (state->__value.__wch < 0x3ffffff, 1)) \
1097 bytebuf[0] = 0xf8; \
1098 ntotal = 5; \
1100 else \
1102 bytebuf[0] = 0xfc; \
1103 ntotal = 6; \
1106 do \
1108 if (--ntotal < inlen) \
1109 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1110 wch >>= 6; \
1112 while (ntotal > 1); \
1114 bytebuf[0] |= wch; \
1117 #include <iconv/loop.c>
1118 #include <iconv/skeleton.c>
1121 /* Convert from UCS2 to the internal (UCS4-like) format. */
1122 #define DEFINE_INIT 0
1123 #define DEFINE_FINI 0
1124 #define MIN_NEEDED_FROM 2
1125 #define MIN_NEEDED_TO 4
1126 #define FROM_DIRECTION 1
1127 #define FROM_LOOP ucs2_internal_loop
1128 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1129 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1130 #define ONE_DIRECTION 1
1132 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1133 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1134 #define LOOPFCT FROM_LOOP
1135 #define BODY \
1137 uint16_t u1 = *((const uint16_t *) inptr); \
1139 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1141 /* Surrogate characters in UCS-2 input are not valid. Reject \
1142 them. (Catching this here is not security relevant.) */ \
1143 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1146 *((uint32_t *) outptr)++ = u1; \
1147 inptr += 2; \
1149 #define LOOP_NEED_FLAGS
1150 #include <iconv/loop.c>
1151 #include <iconv/skeleton.c>
1154 /* Convert from the internal (UCS4-like) format to UCS2. */
1155 #define DEFINE_INIT 0
1156 #define DEFINE_FINI 0
1157 #define MIN_NEEDED_FROM 4
1158 #define MIN_NEEDED_TO 2
1159 #define FROM_DIRECTION 1
1160 #define FROM_LOOP internal_ucs2_loop
1161 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1162 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1163 #define ONE_DIRECTION 1
1165 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1166 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1167 #define LOOPFCT FROM_LOOP
1168 #define BODY \
1170 uint32_t val = *((const uint32_t *) inptr); \
1172 if (__builtin_expect (val >= 0x10000, 0)) \
1174 UNICODE_TAG_HANDLER (val, 4); \
1175 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1177 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1179 /* Surrogate characters in UCS-4 input are not valid. \
1180 We must catch this, because the UCS-2 output might be \
1181 interpreted as UTF-16 by other programs. If we let \
1182 surrogates pass through, attackers could make a security \
1183 hole exploit by synthesizing any desired plane 1-16 \
1184 character. */ \
1185 result = __GCONV_ILLEGAL_INPUT; \
1186 if (! ignore_errors_p ()) \
1187 break; \
1188 inptr += 4; \
1189 ++*irreversible; \
1190 continue; \
1192 else \
1194 *((uint16_t *) outptr)++ = val; \
1195 inptr += 4; \
1198 #define LOOP_NEED_FLAGS
1199 #include <iconv/loop.c>
1200 #include <iconv/skeleton.c>
1203 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1204 #define DEFINE_INIT 0
1205 #define DEFINE_FINI 0
1206 #define MIN_NEEDED_FROM 2
1207 #define MIN_NEEDED_TO 4
1208 #define FROM_DIRECTION 1
1209 #define FROM_LOOP ucs2reverse_internal_loop
1210 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1211 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1212 #define ONE_DIRECTION 1
1214 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1215 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1216 #define LOOPFCT FROM_LOOP
1217 #define BODY \
1219 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1221 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1223 /* Surrogate characters in UCS-2 input are not valid. Reject \
1224 them. (Catching this here is not security relevant.) */ \
1225 if (! ignore_errors_p ()) \
1227 result = __GCONV_ILLEGAL_INPUT; \
1228 break; \
1230 inptr += 2; \
1231 ++*irreversible; \
1232 continue; \
1235 *((uint32_t *) outptr)++ = u1; \
1236 inptr += 2; \
1238 #define LOOP_NEED_FLAGS
1239 #include <iconv/loop.c>
1240 #include <iconv/skeleton.c>
1243 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1244 #define DEFINE_INIT 0
1245 #define DEFINE_FINI 0
1246 #define MIN_NEEDED_FROM 4
1247 #define MIN_NEEDED_TO 2
1248 #define FROM_DIRECTION 1
1249 #define FROM_LOOP internal_ucs2reverse_loop
1250 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1251 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1252 #define ONE_DIRECTION 1
1254 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1255 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1256 #define LOOPFCT FROM_LOOP
1257 #define BODY \
1259 uint32_t val = *((const uint32_t *) inptr); \
1260 if (__builtin_expect (val >= 0x10000, 0)) \
1262 UNICODE_TAG_HANDLER (val, 4); \
1263 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1265 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1267 /* Surrogate characters in UCS-4 input are not valid. \
1268 We must catch this, because the UCS-2 output might be \
1269 interpreted as UTF-16 by other programs. If we let \
1270 surrogates pass through, attackers could make a security \
1271 hole exploit by synthesizing any desired plane 1-16 \
1272 character. */ \
1273 if (! ignore_errors_p ()) \
1275 result = __GCONV_ILLEGAL_INPUT; \
1276 break; \
1278 inptr += 4; \
1279 ++*irreversible; \
1280 continue; \
1282 else \
1284 *((uint16_t *) outptr)++ = bswap_16 (val); \
1285 inptr += 4; \
1288 #define LOOP_NEED_FLAGS
1289 #include <iconv/loop.c>
1290 #include <iconv/skeleton.c>