1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
29 #include <sys/param.h>
32 # define EILSEQ EINVAL
36 /* These are definitions used by some of the functions for handling
37 UTF-8 encoding below. */
38 static const uint32_t encoding_mask
[] =
40 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
43 static const unsigned char encoding_byte
[] =
45 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
49 /* Transform from the internal, UCS4-like format, to UCS4. The
50 difference between the internal ucs4 format and the real UCS4
51 format is, if any, the endianess. The Unicode/ISO 10646 says that
52 unless some higher protocol specifies it differently, the byte
53 order is big endian.*/
56 #define MIN_NEEDED_FROM 4
57 #define MIN_NEEDED_TO 4
58 #define FROM_DIRECTION 1
59 #define FROM_LOOP internal_ucs4_loop
60 #define TO_LOOP internal_ucs4_loop /* This is not used. */
61 #define FUNCTION_NAME __gconv_transform_internal_ucs4
65 internal_ucs4_loop (const unsigned char **inptrp
, const unsigned char *inend
,
66 unsigned char **outptrp
, unsigned char *outend
,
67 mbstate_t *state
, void *data
, size_t *converted
)
69 const unsigned char *inptr
= *inptrp
;
70 unsigned char *outptr
= *outptrp
;
71 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
74 #if __BYTE_ORDER == __LITTLE_ENDIAN
75 /* Sigh, we have to do some real work. */
78 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
79 *((uint32_t *) outptr
)++ = bswap_32 (*(uint32_t *) inptr
);
83 #elif __BYTE_ORDER == __BIG_ENDIAN
84 /* Simply copy the data. */
85 *inptrp
= inptr
+ n_convert
* 4;
86 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
88 # error "This endianess is not supported."
91 /* Determine the status. */
92 if (*outptrp
== outend
)
93 result
= __GCONV_FULL_OUTPUT
;
94 else if (*inptrp
== inend
)
95 result
= __GCONV_EMPTY_INPUT
;
97 result
= __GCONV_INCOMPLETE_INPUT
;
99 if (converted
!= NULL
)
100 converted
+= n_convert
;
105 #include <iconv/skeleton.c>
108 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
109 #define DEFINE_INIT 0
110 #define DEFINE_FINI 0
111 #define MIN_NEEDED_FROM 1
112 #define MIN_NEEDED_TO 4
113 #define FROM_DIRECTION 1
114 #define FROM_LOOP ascii_internal_loop
115 #define TO_LOOP ascii_internal_loop /* This is not used. */
116 #define FUNCTION_NAME __gconv_transform_ascii_internal
118 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
119 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
120 #define LOOPFCT FROM_LOOP
123 if (*inptr > '\x7f') \
125 /* This is no correct ANSI_X3.4-1968 character. */ \
126 result = __GCONV_ILLEGAL_INPUT; \
130 /* It's an one byte sequence. */ \
131 *((uint32_t *) outptr)++ = *inptr++; \
133 #include <iconv/loop.c>
134 #include <iconv/skeleton.c>
137 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
138 #define DEFINE_INIT 0
139 #define DEFINE_FINI 0
140 #define MIN_NEEDED_FROM 4
141 #define MIN_NEEDED_TO 1
142 #define FROM_DIRECTION 1
143 #define FROM_LOOP internal_ascii_loop
144 #define TO_LOOP internal_ascii_loop /* This is not used. */
145 #define FUNCTION_NAME __gconv_transform_internal_ascii
147 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
148 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
149 #define LOOPFCT FROM_LOOP
152 if (*((uint32_t *) inptr) > 0x7f) \
154 /* This is no correct ANSI_X3.4-1968 character. */ \
155 result = __GCONV_ILLEGAL_INPUT; \
159 /* It's an one byte sequence. */ \
160 *outptr++ = *((uint32_t *) inptr)++; \
162 #include <iconv/loop.c>
163 #include <iconv/skeleton.c>
166 /* Convert from the internal (UCS4-like) format to UTF-8. */
167 #define DEFINE_INIT 0
168 #define DEFINE_FINI 0
169 #define MIN_NEEDED_FROM 4
170 #define MIN_NEEDED_TO 1
171 #define MAX_NEEDED_TO 6
172 #define FROM_DIRECTION 1
173 #define FROM_LOOP internal_utf8_loop
174 #define TO_LOOP internal_utf8_loop /* This is not used. */
175 #define FUNCTION_NAME __gconv_transform_internal_utf8
177 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
178 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
179 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
180 #define LOOPFCT FROM_LOOP
183 uint32_t wc = *((uint32_t *) inptr); \
185 /* Since we control every character we read this cannot happen. */ \
186 assert (wc <= 0x7fffffff); \
189 /* It's an one byte sequence. */ \
190 *outptr++ = (unsigned char) wc; \
196 for (step = 2; step < 6; ++step) \
197 if ((wc & encoding_mask[step - 2]) == 0) \
200 if (outptr + step >= outend) \
203 result = __GCONV_FULL_OUTPUT; \
208 *outptr = encoding_byte[step - 2]; \
213 start[step] = 0x80 | (wc & 0x3f); \
216 while (--step > 0); \
222 #include <iconv/loop.c>
223 #include <iconv/skeleton.c>
226 /* Convert from UTF-8 to the internal (UCS4-like) format. */
227 #define DEFINE_INIT 0
228 #define DEFINE_FINI 0
229 #define MIN_NEEDED_FROM 1
230 #define MAX_NEEDED_FROM 6
231 #define MIN_NEEDED_TO 4
232 #define FROM_DIRECTION 1
233 #define FROM_LOOP utf8_internal_loop
234 #define TO_LOOP utf8_internal_loop /* This is not used. */
235 #define FUNCTION_NAME __gconv_transform_utf8_internal
237 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
238 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
239 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
240 #define LOOPFCT FROM_LOOP
247 /* Next input byte. */ \
252 /* One byte sequence. */ \
258 if ((ch & 0xe0) == 0xc0) \
263 else if ((ch & 0xf0) == 0xe0) \
265 /* We expect three bytes. */ \
269 else if ((ch & 0xf8) == 0xf0) \
271 /* We expect four bytes. */ \
275 else if ((ch & 0xfc) == 0xf8) \
277 /* We expect five bytes. */ \
281 else if ((ch & 0xfe) == 0xfc) \
283 /* We expect six bytes. */ \
289 /* This is an illegal encoding. */ \
290 result = __GCONV_ILLEGAL_INPUT; \
294 if (NEED_LENGTH_TEST && inptr + cnt > inend) \
296 /* We don't have enough input. */ \
297 result = __GCONV_INCOMPLETE_INPUT; \
301 /* Read the possible remaining bytes. */ \
302 for (i = 1; i < cnt; ++i) \
304 uint32_t byte = inptr[i]; \
306 if ((byte & 0xc0) != 0x80) \
308 /* This is an illegal encoding. */ \
309 result = __GCONV_ILLEGAL_INPUT; \
319 /* Now adjust the pointers and store the result. */ \
320 *((uint32_t *) outptr)++ = ch; \
322 #include <iconv/loop.c>
323 #include <iconv/skeleton.c>
326 /* Convert from UCS2 to the internal (UCS4-like) format. */
327 #define DEFINE_INIT 0
328 #define DEFINE_FINI 0
329 #define MIN_NEEDED_FROM 2
330 #define MIN_NEEDED_TO 4
331 #define FROM_DIRECTION 1
332 #define FROM_LOOP ucs2_internal_loop
333 #define TO_LOOP ucs2_internal_loop /* This is not used. */
334 #define FUNCTION_NAME __gconv_transform_ucs2_internal
336 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
337 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
338 #define LOOPFCT FROM_LOOP
339 #if __BYTE_ORDER == __LITTLE_ENDIAN
341 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
345 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
347 #include <iconv/loop.c>
348 #include <iconv/skeleton.c>
351 /* Convert from the internal (UCS4-like) format to UCS2. */
352 #define DEFINE_INIT 0
353 #define DEFINE_FINI 0
354 #define MIN_NEEDED_FROM 4
355 #define MIN_NEEDED_TO 2
356 #define FROM_DIRECTION 1
357 #define FROM_LOOP internal_ucs2_loop
358 #define TO_LOOP internal_ucs2_loop /* This is not used. */
359 #define FUNCTION_NAME __gconv_transform_internal_ucs2
361 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
362 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
363 #define LOOPFCT FROM_LOOP
364 #if __BYTE_ORDER == __LITTLE_ENDIAN
367 if (*((uint32_t *) inptr) >= 0x10000) \
369 result = __GCONV_ILLEGAL_INPUT; \
372 /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \
373 pointer which works since we are on a little endian machine. */ \
374 *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)); \
380 if (*((uint32_t *) inptr) >= 0x10000) \
382 result = __GCONV_ILLEGAL_INPUT; \
385 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
388 #include <iconv/loop.c>
389 #include <iconv/skeleton.c>
392 /* Convert from UCS2 in little endian to the internal (UCS4-like) format. */
393 #define DEFINE_INIT 0
394 #define DEFINE_FINI 0
395 #define MIN_NEEDED_FROM 2
396 #define MIN_NEEDED_TO 4
397 #define FROM_DIRECTION 1
398 #define FROM_LOOP ucs2little_internal_loop
399 #define TO_LOOP ucs2little_internal_loop /* This is not used.*/
400 #define FUNCTION_NAME __gconv_transform_ucs2little_internal
402 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
403 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
404 #define LOOPFCT FROM_LOOP
405 #if __BYTE_ORDER == __LITTLE_ENDIAN
407 *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
410 *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
413 #include <iconv/loop.c>
414 #include <iconv/skeleton.c>
417 /* Convert from the internal (UCS4-like) format to UCS2 in little endian. */
418 #define DEFINE_INIT 0
419 #define DEFINE_FINI 0
420 #define MIN_NEEDED_FROM 4
421 #define MIN_NEEDED_TO 2
422 #define FROM_DIRECTION 1
423 #define FROM_LOOP internal_ucs2little_loop
424 #define TO_LOOP internal_ucs2little_loop /* This is not used.*/
425 #define FUNCTION_NAME __gconv_transform_internal_ucs2little
427 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
428 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
429 #define LOOPFCT FROM_LOOP
430 #if __BYTE_ORDER == __LITTLE_ENDIAN
433 if (*((uint32_t *) inptr) >= 0x10000) \
435 result = __GCONV_ILLEGAL_INPUT; \
438 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
443 if (*((uint32_t *) inptr) >= 0x10000) \
445 result = __GCONV_ILLEGAL_INPUT; \
448 *((uint16_t *) outptr)++ = bswap_16 (((uint16_t *) inptr)[1]); \
452 #include <iconv/loop.c>
453 #include <iconv/skeleton.c>
456 /* Convert from the internal (UCS4-like) format to UTF-16. */
457 #define DEFINE_INIT 0
458 #define DEFINE_FINI 0
459 #define MIN_NEEDED_FROM 4
460 #define MIN_NEEDED_TO 2
461 #define MAX_NEEDED_TO 4
462 #define FROM_DIRECTION 1
463 #define FROM_LOOP internal_utf16_loop
464 #define TO_LOOP internal_utf16_loop /* This is not used. */
465 #define FUNCTION_NAME __gconv_transform_internal_utf16
467 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
468 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
469 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
470 #define LOOPFCT FROM_LOOP
471 #if __BYTE_ORDER == __LITTLE_ENDIAN
474 if (*((uint32_t *) inptr) >= 0x10000) \
476 if (*((uint32_t *) inptr) >= 0x110000) \
478 result = __GCONV_ILLEGAL_INPUT; \
482 /* Generate a surrogate character. */ \
483 if (NEED_LENGTH_TEST && outptr + 4 > outend) \
485 /* Overflow in the output buffer. */ \
486 result = __GCONV_FULL_OUTPUT; \
490 *((uint16_t *) outptr)++ = bswap_16 (0xd7c0 \
491 + (*((uint32_t *) inptr) >> 10));\
492 *((uint16_t *) outptr)++ = bswap_16 (0xdc00 \
493 + (*((uint32_t *) inptr) \
497 /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \
498 pointer which works since we are on a little endian machine. */ \
499 *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)); \
505 if (*((uint32_t *) inptr) >= 0x10000) \
507 if (*((uint32_t *) inptr) >= 0x110000) \
509 result = __GCONV_ILLEGAL_INPUT; \
513 /* Generate a surrogate character. */ \
514 if (NEED_LENGTH_TEST && outptr + 4 > outend) \
516 /* Overflow in the output buffer. */ \
517 result = __GCONV_FULL_OUTPUT; \
521 *((uint16_t *) outptr)++ = 0xd7c0 + (*((uint32_t *) inptr) >> 10); \
522 *((uint16_t *) outptr)++ = 0xdc00 + (*((uint32_t *) inptr) & 0x3ff); \
525 *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
528 #include <iconv/loop.c>
529 #include <iconv/skeleton.c>
532 /* Convert from UTF-16 to the internal (UCS4-like) format. */
533 #define DEFINE_INIT 0
534 #define DEFINE_FINI 0
535 #define MIN_NEEDED_FROM 2
536 #define MAX_NEEDED_FROM 4
537 #define MIN_NEEDED_TO 4
538 #define FROM_DIRECTION 1
539 #define FROM_LOOP utf16_internal_loop
540 #define TO_LOOP utf16_internal_loop /* This is not used.*/
541 #define FUNCTION_NAME __gconv_transform_utf16_internal
543 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
544 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
545 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
546 #define LOOPFCT FROM_LOOP
547 #if __BYTE_ORDER == __LITTLE_ENDIAN
550 uint16_t u1 = bswap_16 (*(uint16_t *) inptr); \
552 if (u1 < 0xd800 || u1 > 0xdfff) \
554 /* No surrogate. */ \
555 *((uint32_t *) outptr)++ = u1; \
562 /* It's a surrogate character. At least the first word says \
564 if (NEED_LENGTH_TEST && inptr + 4 > inend) \
566 /* We don't have enough input for another complete input \
568 result = __GCONV_INCOMPLETE_INPUT; \
572 u2 = bswap_16 (((uint16_t *) inptr)[1]); \
573 if (u2 < 0xdc00 || u2 >= 0xdfff) \
575 /* This is no valid second word for a surrogate. */ \
576 result = __GCONV_ILLEGAL_INPUT; \
580 *((uint32_t *) outptr)++ = ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00); \
587 uint16_t u1 = *(uint16_t *) inptr; \
589 if (u1 < 0xd800 || u1 > 0xdfff) \
591 /* No surrogate. */ \
592 *((uint32_t *) outptr)++ = u1; \
599 /* It's a surrogate character. At least the first word says \
601 if (NEED_LENGTH_TEST && inptr + 4 > inend) \
603 /* We don't have enough input for another complete input \
605 result = __GCONV_INCOMPLETE_INPUT; \
609 u2 = ((uint16_t *) inptr)[1]; \
610 if (u2 < 0xdc00 || u2 >= 0xdfff) \
612 /* This is no valid second word for a surrogate. */ \
613 result = __GCONV_ILLEGAL_INPUT; \
617 *((uint32_t *) outptr)++ = ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00); \
622 #include <iconv/loop.c>
623 #include <iconv/skeleton.c>