Update.
[glibc.git] / iconv / gconv_simple.c
blob4010a6b3267df9727d67fcb9207a54036380ea6b
1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <byteswap.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <gconv.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <sys/param.h>
31 #ifndef EILSEQ
32 # define EILSEQ EINVAL
33 #endif
36 /* These are definitions used by some of the functions for handling
37 UTF-8 encoding below. */
38 static const uint32_t encoding_mask[] =
40 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
43 static const unsigned char encoding_byte[] =
45 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
49 /* Transform from the internal, UCS4-like format, to UCS4. The
50 difference between the internal ucs4 format and the real UCS4
51 format is, if any, the endianess. The Unicode/ISO 10646 says that
52 unless some higher protocol specifies it differently, the byte
53 order is big endian.*/
54 #define DEFINE_INIT 0
55 #define DEFINE_FINI 0
56 #define MIN_NEEDED_FROM 4
57 #define MIN_NEEDED_TO 4
58 #define FROM_DIRECTION 1
59 #define FROM_LOOP internal_ucs4_loop
60 #define TO_LOOP internal_ucs4_loop /* This is not used. */
61 #define FUNCTION_NAME __gconv_transform_internal_ucs4
64 static inline int
65 internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
66 unsigned char **outptrp, unsigned char *outend,
67 mbstate_t *state, void *data, size_t *converted)
69 const unsigned char *inptr = *inptrp;
70 unsigned char *outptr = *outptrp;
71 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
72 int result;
74 #if __BYTE_ORDER == __LITTLE_ENDIAN
75 /* Sigh, we have to do some real work. */
76 size_t cnt;
78 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
79 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
81 *inptrp = inptr;
82 *outptrp = outptr;
83 #elif __BYTE_ORDER == __BIG_ENDIAN
84 /* Simply copy the data. */
85 *inptrp = inptr + n_convert * 4;
86 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
87 #else
88 # error "This endianess is not supported."
89 #endif
91 /* Determine the status. */
92 if (*inptrp == inend)
93 result = __GCONV_EMPTY_INPUT;
94 else if (*outptrp == outend)
95 result = __GCONV_FULL_OUTPUT;
96 else
97 result = __GCONV_INCOMPLETE_INPUT;
99 return result;
102 #ifndef _STRING_ARCH_unaligned
103 static inline int
104 internal_ucs4_loop_unaligned (const unsigned char **inptrp,
105 const unsigned char *inend,
106 unsigned char **outptrp, unsigned char *outend,
107 mbstate_t *state, void *data, size_t *converted)
109 const unsigned char *inptr = *inptrp;
110 unsigned char *outptr = *outptrp;
111 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
112 int result;
114 # if __BYTE_ORDER == __LITTLE_ENDIAN
115 /* Sigh, we have to do some real work. */
116 size_t cnt;
118 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
120 outptr[0] = inptr[3];
121 outptr[1] = inptr[2];
122 outptr[2] = inptr[1];
123 outptr[3] = inptr[0];
126 *inptrp = inptr;
127 *outptrp = outptr;
128 # elif __BYTE_ORDER == __BIG_ENDIAN
129 /* Simply copy the data. */
130 *inptrp = inptr + n_convert * 4;
131 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
132 # else
133 # error "This endianess is not supported."
134 # endif
136 /* Determine the status. */
137 if (*outptrp == outend)
138 result = __GCONV_FULL_OUTPUT;
139 else if (*inptrp == inend)
140 result = __GCONV_EMPTY_INPUT;
141 else
142 result = __GCONV_INCOMPLETE_INPUT;
144 return result;
146 #endif
149 static inline int
150 internal_ucs4_loop_single (const unsigned char **inptrp,
151 const unsigned char *inend,
152 unsigned char **outptrp, unsigned char *outend,
153 mbstate_t *state, void *data, size_t *converted)
155 size_t cnt = state->__count & 7;
157 while (*inptrp < inend && cnt < 4)
158 state->__value.__wchb[cnt++] = *(*inptrp)++;
160 if (cnt < 4)
162 /* Still not enough bytes. Store the ones in the input buffer. */
163 state->__count &= ~7;
164 state->__count |= cnt;
166 return __GCONV_INCOMPLETE_INPUT;
169 #if __BYTE_ORDER == __LITTLE_ENDIAN
170 (*outptrp)[0] = state->__value.__wchb[3];
171 (*outptrp)[1] = state->__value.__wchb[2];
172 (*outptrp)[2] = state->__value.__wchb[1];
173 (*outptrp)[3] = state->__value.__wchb[0];
174 #elif __BYTE_ORDER == __BIG_ENDIAN
175 /* XXX unaligned */
176 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
177 #else
178 # error "This endianess is not supported."
179 #endif
181 /* Clear the state buffer. */
182 state->__count &= ~7;
184 return __GCONV_OK;
187 #include <iconv/skeleton.c>
190 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
191 for the other direction we have to check for correct values here. */
192 #define DEFINE_INIT 0
193 #define DEFINE_FINI 0
194 #define MIN_NEEDED_FROM 4
195 #define MIN_NEEDED_TO 4
196 #define FROM_DIRECTION 1
197 #define FROM_LOOP ucs4_internal_loop
198 #define TO_LOOP ucs4_internal_loop /* This is not used. */
199 #define FUNCTION_NAME __gconv_transform_ucs4_internal
202 static inline int
203 ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
204 unsigned char **outptrp, unsigned char *outend,
205 mbstate_t *state, void *data, size_t *converted)
207 const unsigned char *inptr = *inptrp;
208 unsigned char *outptr = *outptrp;
209 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
210 int result;
211 size_t cnt;
213 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
215 uint32_t inval;
217 #if __BYTE_ORDER == __LITTLE_ENDIAN
218 inval = bswap_32 (*(uint32_t *) inptr);
219 #else
220 inval = *(uint32_t *) inptr;
221 #endif
223 if (inval > 0x7fffffff)
225 *inptrp = inptr;
226 *outptrp = outptr;
227 return __GCONV_ILLEGAL_INPUT;
230 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
233 *inptrp = inptr;
234 *outptrp = outptr;
236 /* Determine the status. */
237 if (*inptrp == inend)
238 result = __GCONV_EMPTY_INPUT;
239 else if (*outptrp == outend)
240 result = __GCONV_FULL_OUTPUT;
241 else
242 result = __GCONV_INCOMPLETE_INPUT;
244 return result;
247 #ifndef _STRING_ARCH_unaligned
248 static inline int
249 ucs4_internal_loop_unaligned (const unsigned char **inptrp,
250 const unsigned char *inend,
251 unsigned char **outptrp, unsigned char *outend,
252 mbstate_t *state, void *data, size_t *converted)
254 const unsigned char *inptr = *inptrp;
255 unsigned char *outptr = *outptrp;
256 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
257 int result;
258 size_t cnt;
260 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
262 if (inptr[0] > 0x80)
264 /* The value is too large. */
265 *inptrp = inptr;
266 *outptrp = outptr;
267 return __GCONV_ILLEGAL_INPUT;
270 # if __BYTE_ORDER == __LITTLE_ENDIAN
271 outptr[3] = inptr[0];
272 outptr[2] = inptr[1];
273 outptr[1] = inptr[2];
274 outptr[0] = inptr[3];
275 # else
276 outptr[0] = inptr[0];
277 outptr[1] = inptr[1];
278 outptr[2] = inptr[2];
279 outptr[3] = inptr[3];
280 # endif
282 # if __BYTE_ORDER == __LITTLE_ENDIAN
283 outptr[3] = inptr[0];
284 outptr[2] = inptr[1];
285 outptr[1] = inptr[2];
286 outptr[0] = inptr[3];
287 # else
288 outptr[0] = inptr[0];
289 outptr[1] = inptr[1];
290 outptr[2] = inptr[2];
291 outptr[3] = inptr[3];
292 # endif
295 *inptrp = inptr;
296 *outptrp = outptr;
298 /* Determine the status. */
299 if (*inptrp == inend)
300 result = __GCONV_EMPTY_INPUT;
301 else if (*outptrp == outend)
302 result = __GCONV_FULL_OUTPUT;
303 else
304 result = __GCONV_INCOMPLETE_INPUT;
306 return result;
308 #endif
311 static inline int
312 ucs4_internal_loop_single (const unsigned char **inptrp,
313 const unsigned char *inend,
314 unsigned char **outptrp, unsigned char *outend,
315 mbstate_t *state, void *data, size_t *converted)
317 size_t cnt = state->__count & 7;
319 while (*inptrp < inend && cnt < 4)
320 state->__value.__wchb[cnt++] = *(*inptrp)++;
322 if (cnt < 4)
324 /* Still not enough bytes. Store the ones in the input buffer. */
325 state->__count &= ~7;
326 state->__count |= cnt;
328 return __GCONV_INCOMPLETE_INPUT;
331 if (((unsigned char *) state->__value.__wchb)[0] > 0x80)
332 /* The value is too large. */
333 return __GCONV_ILLEGAL_INPUT;
335 #if __BYTE_ORDER == __LITTLE_ENDIAN
336 (*outptrp)[0] = state->__value.__wchb[3];
337 (*outptrp)[1] = state->__value.__wchb[2];
338 (*outptrp)[2] = state->__value.__wchb[1];
339 (*outptrp)[3] = state->__value.__wchb[0];
340 #elif __BYTE_ORDER == __BIG_ENDIAN
341 (*outptrp)[0] = state->__value.__wchb[0];
342 (*outptrp)[1] = state->__value.__wchb[1];
343 (*outptrp)[2] = state->__value.__wchb[2];
344 (*outptrp)[3] = state->__value.__wchb[3];
345 #endif
347 /* Clear the state buffer. */
348 state->__count &= ~7;
350 return __GCONV_OK;
353 #include <iconv/skeleton.c>
356 /* Similarly for the little endian form. */
357 #define DEFINE_INIT 0
358 #define DEFINE_FINI 0
359 #define MIN_NEEDED_FROM 4
360 #define MIN_NEEDED_TO 4
361 #define FROM_DIRECTION 1
362 #define FROM_LOOP internal_ucs4le_loop
363 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
364 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
367 static inline int
368 internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend,
369 unsigned char **outptrp, unsigned char *outend,
370 mbstate_t *state, void *data, size_t *converted)
372 const unsigned char *inptr = *inptrp;
373 unsigned char *outptr = *outptrp;
374 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
375 int result;
377 #if __BYTE_ORDER == __BIG_ENDIAN
378 /* Sigh, we have to do some real work. */
379 size_t cnt;
381 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
382 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
384 *inptrp = inptr;
385 *outptrp = outptr;
386 #elif __BYTE_ORDER == __LITTLE_ENDIAN
387 /* Simply copy the data. */
388 *inptrp = inptr + n_convert * 4;
389 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
390 #else
391 # error "This endianess is not supported."
392 #endif
394 /* Determine the status. */
395 if (*inptrp == inend)
396 result = __GCONV_EMPTY_INPUT;
397 else if (*outptrp == outend)
398 result = __GCONV_FULL_OUTPUT;
399 else
400 result = __GCONV_INCOMPLETE_INPUT;
402 return result;
405 #ifndef _STRING_ARCH_unaligned
406 static inline int
407 internal_ucs4le_loop_unaligned (const unsigned char **inptrp,
408 const unsigned char *inend,
409 unsigned char **outptrp, unsigned char *outend,
410 mbstate_t *state, void *data,
411 size_t *converted)
413 const unsigned char *inptr = *inptrp;
414 unsigned char *outptr = *outptrp;
415 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
416 int result;
418 # if __BYTE_ORDER == __BIG_ENDIAN
419 /* Sigh, we have to do some real work. */
420 size_t cnt;
422 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
424 outptr[0] = inptr[3];
425 outptr[1] = inptr[2];
426 outptr[2] = inptr[1];
427 outptr[3] = inptr[0];
430 *inptrp = inptr;
431 *outptrp = outptr;
432 # elif __BYTE_ORDER == __LITTLE_ENDIAN
433 /* Simply copy the data. */
434 *inptrp = inptr + n_convert * 4;
435 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
436 # else
437 # error "This endianess is not supported."
438 # endif
440 /* Determine the status. */
441 if (*inptrp == inend)
442 result = __GCONV_EMPTY_INPUT;
443 else if (*outptrp == outend)
444 result = __GCONV_FULL_OUTPUT;
445 else
446 result = __GCONV_INCOMPLETE_INPUT;
448 return result;
450 #endif
453 static inline int
454 internal_ucs4le_loop_single (const unsigned char **inptrp,
455 const unsigned char *inend,
456 unsigned char **outptrp, unsigned char *outend,
457 mbstate_t *state, void *data, size_t *converted)
459 size_t cnt = state->__count & 7;
461 while (*inptrp < inend && cnt < 4)
462 state->__value.__wchb[cnt++] = *(*inptrp)++;
464 if (cnt < 4)
466 /* Still not enough bytes. Store the ones in the input buffer. */
467 state->__count &= ~7;
468 state->__count |= cnt;
470 return __GCONV_INCOMPLETE_INPUT;
473 #if __BYTE_ORDER == __BIG_ENDIAN
474 (*outptrp)[0] = state->__value.__wchb[3];
475 (*outptrp)[1] = state->__value.__wchb[2];
476 (*outptrp)[2] = state->__value.__wchb[1];
477 (*outptrp)[3] = state->__value.__wchb[0];
478 #else
479 /* XXX unaligned */
480 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
481 #endif
483 /* Clear the state buffer. */
484 state->__count &= ~7;
486 return __GCONV_OK;
489 #include <iconv/skeleton.c>
492 /* And finally from UCS4-LE to the internal encoding. */
493 #define DEFINE_INIT 0
494 #define DEFINE_FINI 0
495 #define MIN_NEEDED_FROM 4
496 #define MIN_NEEDED_TO 4
497 #define FROM_DIRECTION 1
498 #define FROM_LOOP ucs4le_internal_loop
499 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
500 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
503 static inline int
504 ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
505 unsigned char **outptrp, unsigned char *outend,
506 mbstate_t *state, void *data, size_t *converted)
508 const unsigned char *inptr = *inptrp;
509 unsigned char *outptr = *outptrp;
510 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
511 int result;
512 size_t cnt;
514 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
516 uint32_t inval;
518 #if __BYTE_ORDER == __BIG_ENDIAN
519 inval = bswap_32 (*(uint32_t *) inptr);
520 #else
521 inval = *(uint32_t *) inptr;
522 #endif
524 if (inval > 0x7fffffff)
525 return __GCONV_ILLEGAL_INPUT;
527 *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
530 *inptrp = inptr;
531 *outptrp = outptr;
533 /* Determine the status. */
534 if (*inptrp == inend)
535 result = __GCONV_EMPTY_INPUT;
536 else if (*outptrp == outend)
537 result = __GCONV_FULL_OUTPUT;
538 else
539 result = __GCONV_INCOMPLETE_INPUT;
541 return result;
544 #ifndef _STRING_ARCH_unaligned
545 static inline int
546 ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
547 const unsigned char *inend,
548 unsigned char **outptrp, unsigned char *outend,
549 mbstate_t *state, void *data,
550 size_t *converted)
552 const unsigned char *inptr = *inptrp;
553 unsigned char *outptr = *outptrp;
554 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
555 int result;
556 size_t cnt;
558 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
560 if (inptr[3] > 0x80)
562 /* The value is too large. */
563 *inptrp = inptr;
564 *outptrp = outptr;
565 return __GCONV_ILLEGAL_INPUT;
569 # if __BYTE_ORDER == __BIG_ENDIAN
570 outptr[3] = inptr[0];
571 outptr[2] = inptr[1];
572 outptr[1] = inptr[2];
573 outptr[0] = inptr[3];
574 # else
575 outptr[0] = inptr[0];
576 outptr[1] = inptr[1];
577 outptr[2] = inptr[2];
578 outptr[3] = inptr[3];
579 # endif
582 *inptrp = inptr;
583 *outptrp = outptr;
585 /* Determine the status. */
586 if (*inptrp == inend)
587 result = __GCONV_EMPTY_INPUT;
588 else if (*outptrp == outend)
589 result = __GCONV_FULL_OUTPUT;
590 else
591 result = __GCONV_INCOMPLETE_INPUT;
593 return result;
595 #endif
598 static inline int
599 ucs4le_internal_loop_single (const unsigned char **inptrp,
600 const unsigned char *inend,
601 unsigned char **outptrp, unsigned char *outend,
602 mbstate_t *state, void *data, size_t *converted)
604 size_t cnt = state->__count & 7;
606 while (*inptrp < inend && cnt < 4)
607 state->__value.__wchb[cnt++] = *(*inptrp)++;
609 if (cnt < 4)
611 /* Still not enough bytes. Store the ones in the input buffer. */
612 state->__count &= ~7;
613 state->__count |= cnt;
615 return __GCONV_INCOMPLETE_INPUT;
618 if (((unsigned char *) state->__value.__wchb)[3] > 0x80)
619 /* The value is too large. */
620 return __GCONV_ILLEGAL_INPUT;
622 #if __BYTE_ORDER == __BIG_ENDIAN
623 (*outptrp)[0] = state->__value.__wchb[3];
624 (*outptrp)[1] = state->__value.__wchb[2];
625 (*outptrp)[2] = state->__value.__wchb[1];
626 (*outptrp)[3] = state->__value.__wchb[0];
627 #elif __BYTE_ORDER == __BIG_ENDIAN
628 (*outptrp)[0] = state->__value.__wchb[0];
629 (*outptrp)[1] = state->__value.__wchb[1];
630 (*outptrp)[2] = state->__value.__wchb[2];
631 (*outptrp)[3] = state->__value.__wchb[3];
632 #endif
634 /* Clear the state buffer. */
635 state->__count &= ~7;
637 return __GCONV_OK;
640 #include <iconv/skeleton.c>
643 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
644 #define DEFINE_INIT 0
645 #define DEFINE_FINI 0
646 #define MIN_NEEDED_FROM 1
647 #define MIN_NEEDED_TO 4
648 #define FROM_DIRECTION 1
649 #define FROM_LOOP ascii_internal_loop
650 #define TO_LOOP ascii_internal_loop /* This is not used. */
651 #define FUNCTION_NAME __gconv_transform_ascii_internal
652 #define ONE_DIRECTION 1
654 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
655 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
656 #define LOOPFCT FROM_LOOP
657 #define BODY \
659 if (*inptr > '\x7f') \
661 /* This is no correct ANSI_X3.4-1968 character. */ \
662 result = __GCONV_ILLEGAL_INPUT; \
663 break; \
666 /* It's an one byte sequence. */ \
667 /* XXX unaligned. */ \
668 *((uint32_t *) outptr)++ = *inptr++; \
670 #include <iconv/loop.c>
671 #include <iconv/skeleton.c>
674 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
675 #define DEFINE_INIT 0
676 #define DEFINE_FINI 0
677 #define MIN_NEEDED_FROM 4
678 #define MIN_NEEDED_TO 1
679 #define FROM_DIRECTION 1
680 #define FROM_LOOP internal_ascii_loop
681 #define TO_LOOP internal_ascii_loop /* This is not used. */
682 #define FUNCTION_NAME __gconv_transform_internal_ascii
683 #define ONE_DIRECTION 1
685 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
686 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
687 #define LOOPFCT FROM_LOOP
688 #define BODY \
690 if (*((uint32_t *) inptr) > 0x7f) \
692 /* This is no correct ANSI_X3.4-1968 character. */ \
693 result = __GCONV_ILLEGAL_INPUT; \
694 break; \
697 /* It's an one byte sequence. */ \
698 *outptr++ = *((uint32_t *) inptr)++; \
700 #include <iconv/loop.c>
701 #include <iconv/skeleton.c>
704 /* Convert from the internal (UCS4-like) format to UTF-8. */
705 #define DEFINE_INIT 0
706 #define DEFINE_FINI 0
707 #define MIN_NEEDED_FROM 4
708 #define MIN_NEEDED_TO 1
709 #define MAX_NEEDED_TO 6
710 #define FROM_DIRECTION 1
711 #define FROM_LOOP internal_utf8_loop
712 #define TO_LOOP internal_utf8_loop /* This is not used. */
713 #define FUNCTION_NAME __gconv_transform_internal_utf8
714 #define ONE_DIRECTION 1
716 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
717 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
718 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
719 #define LOOPFCT FROM_LOOP
720 #define BODY \
722 uint32_t wc = *((uint32_t *) inptr); \
724 /* Since we control every character we read this cannot happen. */ \
725 assert (wc <= 0x7fffffff); \
727 if (wc < 0x80) \
728 /* It's an one byte sequence. */ \
729 *outptr++ = (unsigned char) wc; \
730 else \
732 size_t step; \
733 char *start; \
735 for (step = 2; step < 6; ++step) \
736 if ((wc & encoding_mask[step - 2]) == 0) \
737 break; \
739 if (outptr + step > outend) \
741 /* Too long. */ \
742 result = __GCONV_FULL_OUTPUT; \
743 break; \
746 start = outptr; \
747 *outptr = encoding_byte[step - 2]; \
748 outptr += step; \
749 --step; \
750 do \
752 start[step] = 0x80 | (wc & 0x3f); \
753 wc >>= 6; \
755 while (--step > 0); \
756 start[0] |= wc; \
759 inptr += 4; \
761 #include <iconv/loop.c>
762 #include <iconv/skeleton.c>
765 /* Convert from UTF-8 to the internal (UCS4-like) format. */
766 #define DEFINE_INIT 0
767 #define DEFINE_FINI 0
768 #define MIN_NEEDED_FROM 1
769 #define MAX_NEEDED_FROM 6
770 #define MIN_NEEDED_TO 4
771 #define FROM_DIRECTION 1
772 #define FROM_LOOP utf8_internal_loop
773 #define TO_LOOP utf8_internal_loop /* This is not used. */
774 #define FUNCTION_NAME __gconv_transform_utf8_internal
775 #define ONE_DIRECTION 1
777 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
778 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
779 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
780 #define LOOPFCT FROM_LOOP
781 #define BODY \
783 uint32_t ch; \
784 uint_fast32_t cnt; \
785 uint_fast32_t i; \
787 /* Next input byte. */ \
788 ch = *inptr; \
790 if (ch < 0x80) \
792 /* One byte sequence. */ \
793 cnt = 1; \
794 ++inptr; \
796 else \
798 if (ch >= 0xc2 && ch < 0xe0) \
800 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
801 otherwise the wide character could have been represented \
802 using a single byte. */ \
803 cnt = 2; \
804 ch &= 0x1f; \
806 else if ((ch & 0xf0) == 0xe0) \
808 /* We expect three bytes. */ \
809 cnt = 3; \
810 ch &= 0x0f; \
812 else if ((ch & 0xf8) == 0xf0) \
814 /* We expect four bytes. */ \
815 cnt = 4; \
816 ch &= 0x07; \
818 else if ((ch & 0xfc) == 0xf8) \
820 /* We expect five bytes. */ \
821 cnt = 5; \
822 ch &= 0x03; \
824 else if ((ch & 0xfe) == 0xfc) \
826 /* We expect six bytes. */ \
827 cnt = 6; \
828 ch &= 0x01; \
830 else \
832 /* This is an illegal encoding. */ \
833 result = __GCONV_ILLEGAL_INPUT; \
834 break; \
837 if (NEED_LENGTH_TEST && inptr + cnt > inend) \
839 /* We don't have enough input. But before we report that check \
840 that all the bytes are correct. */ \
841 for (i = 1; inptr + i < inend; ++i) \
842 if ((inptr[i] & 0xc0) != 0x80) \
843 break; \
844 result = (inptr + i == inend \
845 ? __GCONV_INCOMPLETE_INPUT : __GCONV_ILLEGAL_INPUT); \
846 break; \
849 /* Read the possible remaining bytes. */ \
850 for (i = 1; i < cnt; ++i) \
852 uint32_t byte = inptr[i]; \
854 if ((byte & 0xc0) != 0x80) \
855 /* This is an illegal encoding. */ \
856 break; \
858 ch <<= 6; \
859 ch |= byte & 0x3f; \
862 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
863 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
864 have been represented with fewer than cnt bytes. */ \
865 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
867 /* This is an illegal encoding. */ \
868 result = __GCONV_ILLEGAL_INPUT; \
869 break; \
872 inptr += cnt; \
875 /* Now adjust the pointers and store the result. */ \
876 *((uint32_t *) outptr)++ = ch; \
879 #define STORE_REST \
881 /* We store the remaining bytes while converting them into the UCS4 \
882 format. We can assume that the first byte in the buffer is \
883 correct and that it requires a larger number of bytes than there \
884 are in the input buffer. */ \
885 wint_t ch = **inptrp; \
886 size_t cnt; \
888 state->__count = inend - *inptrp; \
890 if (ch >= 0xc2 && ch < 0xe0) \
892 /* We expect two bytes. The first byte cannot be 0xc0 or \
893 0xc1, otherwise the wide character could have been \
894 represented using a single byte. */ \
895 cnt = 2; \
896 ch &= 0x1f; \
898 else if ((ch & 0xf0) == 0xe0) \
900 /* We expect three bytes. */ \
901 cnt = 3; \
902 ch &= 0x0f; \
904 else if ((ch & 0xf8) == 0xf0) \
906 /* We expect four bytes. */ \
907 cnt = 4; \
908 ch &= 0x07; \
910 else if ((ch & 0xfc) == 0xf8) \
912 /* We expect five bytes. */ \
913 cnt = 5; \
914 ch &= 0x03; \
916 else \
918 /* We expect six bytes. */ \
919 cnt = 6; \
920 ch &= 0x01; \
923 /* The first byte is already consumed. */ \
924 --cnt; \
925 while (++(*inptrp) < inend) \
927 ch <<= 6; \
928 ch |= **inptrp & 0x3f; \
929 --cnt; \
932 /* Shift for the so far missing bytes. */ \
933 ch <<= cnt * 6; \
935 /* Store the value. */ \
936 state->__value.__wch = ch; \
939 #define UNPACK_BYTES \
941 wint_t wch = state->__value.__wch; \
942 size_t ntotal; \
943 inlen = state->__count; \
945 if (state->__value.__wch <= 0x7ff) \
947 bytebuf[0] = 0xc0; \
948 ntotal = 2; \
950 else if (state->__value.__wch <= 0xffff) \
952 bytebuf[0] = 0xe0; \
953 ntotal = 3; \
955 else if (state->__value.__wch <= 0x1fffff) \
957 bytebuf[0] = 0xf0; \
958 ntotal = 4; \
960 else if (state->__value.__wch <= 0x3ffffff) \
962 bytebuf[0] = 0xf8; \
963 ntotal = 5; \
965 else \
967 bytebuf[0] = 0xfc; \
968 ntotal = 6; \
971 do \
973 if (--ntotal < inlen) \
974 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
975 wch >>= 6; \
977 while (ntotal > 1); \
979 bytebuf[0] |= wch; \
982 #include <iconv/loop.c>
983 #include <iconv/skeleton.c>
986 /* Convert from UCS2 to the internal (UCS4-like) format. */
987 #define DEFINE_INIT 0
988 #define DEFINE_FINI 0
989 #define MIN_NEEDED_FROM 2
990 #define MIN_NEEDED_TO 4
991 #define FROM_DIRECTION 1
992 #define FROM_LOOP ucs2_internal_loop
993 #define TO_LOOP ucs2_internal_loop /* This is not used. */
994 #define FUNCTION_NAME __gconv_transform_ucs2_internal
995 #define ONE_DIRECTION 1
997 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
998 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
999 #define LOOPFCT FROM_LOOP
1000 #define BODY \
1001 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1002 #include <iconv/loop.c>
1003 #include <iconv/skeleton.c>
1006 /* Convert from the internal (UCS4-like) format to UCS2. */
1007 #define DEFINE_INIT 0
1008 #define DEFINE_FINI 0
1009 #define MIN_NEEDED_FROM 4
1010 #define MIN_NEEDED_TO 2
1011 #define FROM_DIRECTION 1
1012 #define FROM_LOOP internal_ucs2_loop
1013 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1014 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1015 #define ONE_DIRECTION 1
1017 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1018 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1019 #define LOOPFCT FROM_LOOP
1020 #define BODY \
1022 if (*((uint32_t *) inptr) >= 0x10000) \
1024 result = __GCONV_ILLEGAL_INPUT; \
1025 break; \
1027 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
1029 #include <iconv/loop.c>
1030 #include <iconv/skeleton.c>
1033 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1034 #define DEFINE_INIT 0
1035 #define DEFINE_FINI 0
1036 #define MIN_NEEDED_FROM 2
1037 #define MIN_NEEDED_TO 4
1038 #define FROM_DIRECTION 1
1039 #define FROM_LOOP ucs2reverse_internal_loop
1040 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1041 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1042 #define ONE_DIRECTION 1
1044 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1045 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1046 #define LOOPFCT FROM_LOOP
1047 #define BODY \
1048 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
1049 inptr += 2;
1050 #include <iconv/loop.c>
1051 #include <iconv/skeleton.c>
1054 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1055 #define DEFINE_INIT 0
1056 #define DEFINE_FINI 0
1057 #define MIN_NEEDED_FROM 4
1058 #define MIN_NEEDED_TO 2
1059 #define FROM_DIRECTION 1
1060 #define FROM_LOOP internal_ucs2reverse_loop
1061 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1062 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1063 #define ONE_DIRECTION 1
1065 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1066 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1067 #define LOOPFCT FROM_LOOP
1068 #define BODY \
1070 uint32_t val = *((uint32_t *) inptr); \
1071 if (val >= 0x10000) \
1073 result = __GCONV_ILLEGAL_INPUT; \
1074 break; \
1076 *((uint16_t *) outptr)++ = bswap_16 (val); \
1077 inptr += 4; \
1079 #include <iconv/loop.c>
1080 #include <iconv/skeleton.c>