1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2011 Jeffrey Stedfast
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use, copy,
9 * modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
32 #include "../utils/mono-errno.h"
35 #define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
37 #define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
41 #define UNROLL_DECODE_UTF8 0
42 #define UNROLL_ENCODE_UTF8 0
44 typedef int (* Decoder
) (char *inbuf
, size_t inleft
, gunichar
*outchar
);
45 typedef int (* Encoder
) (gunichar c
, char *outbuf
, size_t outleft
);
56 static int decode_utf32be (char *inbuf
, size_t inleft
, gunichar
*outchar
);
57 static int encode_utf32be (gunichar c
, char *outbuf
, size_t outleft
);
59 static int decode_utf32le (char *inbuf
, size_t inleft
, gunichar
*outchar
);
60 static int encode_utf32le (gunichar c
, char *outbuf
, size_t outleft
);
62 static int decode_utf16be (char *inbuf
, size_t inleft
, gunichar
*outchar
);
63 static int encode_utf16be (gunichar c
, char *outbuf
, size_t outleft
);
65 static int decode_utf16le (char *inbuf
, size_t inleft
, gunichar
*outchar
);
66 static int encode_utf16le (gunichar c
, char *outbuf
, size_t outleft
);
68 static FORCE_INLINE (int) decode_utf8 (char *inbuf
, size_t inleft
, gunichar
*outchar
);
69 static int encode_utf8 (gunichar c
, char *outbuf
, size_t outleft
);
71 static int decode_latin1 (char *inbuf
, size_t inleft
, gunichar
*outchar
);
72 static int encode_latin1 (gunichar c
, char *outbuf
, size_t outleft
);
74 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
75 #define decode_utf32 decode_utf32le
76 #define encode_utf32 encode_utf32le
77 #define decode_utf16 decode_utf16le
78 #define encode_utf16 encode_utf16le
80 #define decode_utf32 decode_utf32be
81 #define encode_utf32 encode_utf32be
82 #define decode_utf16 decode_utf16be
83 #define encode_utf16 encode_utf16be
91 { "ISO-8859-1", decode_latin1
, encode_latin1
},
92 { "ISO8859-1", decode_latin1
, encode_latin1
},
93 { "UTF-32BE", decode_utf32be
, encode_utf32be
},
94 { "UTF-32LE", decode_utf32le
, encode_utf32le
},
95 { "UTF-16BE", decode_utf16be
, encode_utf16be
},
96 { "UTF-16LE", decode_utf16le
, encode_utf16le
},
97 { "UTF-32", decode_utf32
, encode_utf32
},
98 { "UTF-16", decode_utf16
, encode_utf16
},
99 { "UTF-8", decode_utf8
, encode_utf8
},
100 { "US-ASCII", decode_latin1
, encode_latin1
},
101 { "Latin1", decode_latin1
, encode_latin1
},
102 { "ASCII", decode_latin1
, encode_latin1
},
103 { "UTF32", decode_utf32
, encode_utf32
},
104 { "UTF16", decode_utf16
, encode_utf16
},
105 { "UTF8", decode_utf8
, encode_utf8
},
110 g_iconv_open (const char *to_charset
, const char *from_charset
)
113 iconv_t icd
= (iconv_t
) -1;
115 Decoder decoder
= NULL
;
116 Encoder encoder
= NULL
;
120 if (!to_charset
|| !from_charset
|| !to_charset
[0] || !from_charset
[0]) {
121 mono_set_errno (EINVAL
);
126 for (i
= 0; i
< G_N_ELEMENTS (charsets
); i
++) {
127 if (!g_ascii_strcasecmp (charsets
[i
].name
, from_charset
))
128 decoder
= charsets
[i
].decoder
;
130 if (!g_ascii_strcasecmp (charsets
[i
].name
, to_charset
))
131 encoder
= charsets
[i
].encoder
;
134 if (!encoder
|| !decoder
) {
136 if ((icd
= iconv_open (to_charset
, from_charset
)) == (iconv_t
) -1)
139 mono_set_errno (EINVAL
);
145 cd
= (GIConv
) g_malloc (sizeof (struct _GIConv
));
146 cd
->decode
= decoder
;
147 cd
->encode
= encoder
;
158 g_iconv_close (GIConv cd
)
161 if (cd
->cd
!= (iconv_t
) -1)
162 iconv_close (cd
->cd
);
171 g_iconv (GIConv cd
, gchar
**inbytes
, gsize
*inbytesleft
,
172 gchar
**outbytes
, gsize
*outbytesleft
)
174 gsize inleft
, outleft
;
175 char *inptr
, *outptr
;
180 if (cd
->cd
!= (iconv_t
) -1) {
181 /* Note: gsize may have a different size than size_t, so we need to
182 remap inbytesleft and outbytesleft to size_t's. */
183 size_t *outleftptr
, *inleftptr
;
184 size_t n_outleft
, n_inleft
;
187 n_inleft
= *inbytesleft
;
188 inleftptr
= &n_inleft
;
194 n_outleft
= *outbytesleft
;
195 outleftptr
= &n_outleft
;
199 #if defined(__NetBSD__)
200 return iconv (cd
->cd
, (const gchar
**)inbytes
, inleftptr
, outbytes
, outleftptr
);
202 return iconv (cd
->cd
, inbytes
, inleftptr
, outbytes
, outleftptr
);
207 if (outbytes
== NULL
|| outbytesleft
== NULL
) {
208 /* reset converter */
213 inleft
= inbytesleft
? *inbytesleft
: 0;
214 inptr
= inbytes
? *inbytes
: NULL
;
215 outleft
= *outbytesleft
;
218 if ((c
= cd
->c
) != (gunichar
) -1)
222 if ((rc
= cd
->decode (inptr
, inleft
, &c
)) < 0)
229 if ((rc
= cd
->encode (c
, outptr
, outleft
)) < 0)
238 *inbytesleft
= inleft
;
243 *outbytesleft
= outleft
;
247 return rc
< 0 ? -1 : 0;
251 * Unicode encoders and decoders
254 static FORCE_INLINE (uint32_t)
255 read_uint32_endian (unsigned char *inptr
, unsigned endian
)
257 if (endian
== G_LITTLE_ENDIAN
)
258 return (inptr
[3] << 24) | (inptr
[2] << 16) | (inptr
[1] << 8) | inptr
[0];
259 return (inptr
[0] << 24) | (inptr
[1] << 16) | (inptr
[2] << 8) | inptr
[3];
263 decode_utf32_endian (char *inbuf
, size_t inleft
, gunichar
*outchar
, unsigned endian
)
265 unsigned char *inptr
= (unsigned char *) inbuf
;
269 mono_set_errno (EINVAL
);
273 c
= read_uint32_endian (inptr
, endian
);
275 if (c
>= 0xd800 && c
< 0xe000) {
276 mono_set_errno (EILSEQ
);
278 } else if (c
>= 0x110000) {
279 mono_set_errno (EILSEQ
);
289 decode_utf32be (char *inbuf
, size_t inleft
, gunichar
*outchar
)
291 return decode_utf32_endian (inbuf
, inleft
, outchar
, G_BIG_ENDIAN
);
295 decode_utf32le (char *inbuf
, size_t inleft
, gunichar
*outchar
)
297 return decode_utf32_endian (inbuf
, inleft
, outchar
, G_LITTLE_ENDIAN
);
301 encode_utf32be (gunichar c
, char *outbuf
, size_t outleft
)
303 unsigned char *outptr
= (unsigned char *) outbuf
;
306 mono_set_errno (E2BIG
);
310 outptr
[0] = (c
>> 24) & 0xff;
311 outptr
[1] = (c
>> 16) & 0xff;
312 outptr
[2] = (c
>> 8) & 0xff;
313 outptr
[3] = c
& 0xff;
319 encode_utf32le (gunichar c
, char *outbuf
, size_t outleft
)
321 unsigned char *outptr
= (unsigned char *) outbuf
;
324 mono_set_errno (E2BIG
);
328 outptr
[0] = c
& 0xff;
329 outptr
[1] = (c
>> 8) & 0xff;
330 outptr
[2] = (c
>> 16) & 0xff;
331 outptr
[3] = (c
>> 24) & 0xff;
336 static FORCE_INLINE (uint16_t)
337 read_uint16_endian (unsigned char *inptr
, unsigned endian
)
339 if (endian
== G_LITTLE_ENDIAN
)
340 return (inptr
[1] << 8) | inptr
[0];
341 return (inptr
[0] << 8) | inptr
[1];
344 static FORCE_INLINE (int)
345 decode_utf16_endian (char *inbuf
, size_t inleft
, gunichar
*outchar
, unsigned endian
)
347 unsigned char *inptr
= (unsigned char *) inbuf
;
352 mono_set_errno (E2BIG
);
356 u
= read_uint16_endian (inptr
, endian
);
359 /* 0x0000 -> 0xd7ff */
362 } else if (u
< 0xdc00) {
363 /* 0xd800 -> 0xdbff */
365 mono_set_errno (EINVAL
);
369 c
= read_uint16_endian (inptr
+ 2, endian
);
371 if (c
< 0xdc00 || c
> 0xdfff) {
372 mono_set_errno (EILSEQ
);
376 u
= ((u
- 0xd800) << 10) + (c
- 0xdc00) + 0x0010000UL
;
380 } else if (u
< 0xe000) {
381 /* 0xdc00 -> 0xdfff */
382 mono_set_errno (EILSEQ
);
385 /* 0xe000 -> 0xffff */
392 decode_utf16be (char *inbuf
, size_t inleft
, gunichar
*outchar
)
394 return decode_utf16_endian (inbuf
, inleft
, outchar
, G_BIG_ENDIAN
);
398 decode_utf16le (char *inbuf
, size_t inleft
, gunichar
*outchar
)
400 return decode_utf16_endian (inbuf
, inleft
, outchar
, G_LITTLE_ENDIAN
);
403 static FORCE_INLINE (void)
404 write_uint16_endian (unsigned char *outptr
, uint16_t c
, unsigned endian
)
406 if (endian
== G_LITTLE_ENDIAN
) {
407 outptr
[0] = c
& 0xff;
408 outptr
[1] = (c
>> 8) & 0xff;
411 outptr
[0] = (c
>> 8) & 0xff;
412 outptr
[1] = c
& 0xff;
415 static FORCE_INLINE (int)
416 encode_utf16_endian (gunichar c
, char *outbuf
, size_t outleft
, unsigned endian
)
418 unsigned char *outptr
= (unsigned char *) outbuf
;
424 mono_set_errno (E2BIG
);
428 write_uint16_endian (outptr
, c
, endian
);
432 mono_set_errno (E2BIG
);
438 ch
= (gunichar2
) ((c2
>> 10) + 0xd800);
439 write_uint16_endian (outptr
, ch
, endian
);
441 ch
= (gunichar2
) ((c2
& 0x3ff) + 0xdc00);
442 write_uint16_endian (outptr
+ 2, ch
, endian
);
448 encode_utf16be (gunichar c
, char *outbuf
, size_t outleft
)
450 return encode_utf16_endian (c
, outbuf
, outleft
, G_BIG_ENDIAN
);
454 encode_utf16le (gunichar c
, char *outbuf
, size_t outleft
)
456 return encode_utf16_endian (c
, outbuf
, outleft
, G_LITTLE_ENDIAN
);
459 static FORCE_INLINE (int)
460 decode_utf8 (char *inbuf
, size_t inleft
, gunichar
*outchar
)
462 unsigned char *inptr
= (unsigned char *) inbuf
;
469 /* simple ascii case */
472 } else if (u
< 0xc2) {
473 mono_set_errno (EILSEQ
);
475 } else if (u
< 0xe0) {
478 } else if (u
< 0xf0) {
481 } else if (u
< 0xf8) {
484 } else if (u
< 0xfc) {
487 } else if (u
< 0xfe) {
491 mono_set_errno (EILSEQ
);
496 mono_set_errno (EINVAL
);
500 #if UNROLL_DECODE_UTF8
502 case 6: u
= (u
<< 6) | (*++inptr
^ 0x80);
503 case 5: u
= (u
<< 6) | (*++inptr
^ 0x80);
504 case 4: u
= (u
<< 6) | (*++inptr
^ 0x80);
505 case 3: u
= (u
<< 6) | (*++inptr
^ 0x80);
506 case 2: u
= (u
<< 6) | (*++inptr
^ 0x80);
509 for (i
= 1; i
< n
; i
++)
510 u
= (u
<< 6) | (*++inptr
^ 0x80);
519 encode_utf8 (gunichar c
, char *outbuf
, size_t outleft
)
521 unsigned char *outptr
= (unsigned char *) outbuf
;
527 } else if (c
< 0x800) {
530 } else if (c
< 0x10000) {
533 } else if (c
< 0x200000) {
536 } else if (c
< 0x4000000) {
545 mono_set_errno (E2BIG
);
549 #if UNROLL_ENCODE_UTF8
551 case 6: outptr
[5] = (c
& 0x3f) | 0x80; c
>>= 6;
552 case 5: outptr
[4] = (c
& 0x3f) | 0x80; c
>>= 6;
553 case 4: outptr
[3] = (c
& 0x3f) | 0x80; c
>>= 6;
554 case 3: outptr
[2] = (c
& 0x3f) | 0x80; c
>>= 6;
555 case 2: outptr
[1] = (c
& 0x3f) | 0x80; c
>>= 6;
556 case 1: outptr
[0] = c
| base
;
559 for (i
= n
- 1; i
> 0; i
--) {
560 outptr
[i
] = (c
& 0x3f) | 0x80;
564 outptr
[0] = c
| base
;
571 decode_latin1 (char *inbuf
, size_t inleft
, gunichar
*outchar
)
573 *outchar
= (unsigned char) *inbuf
;
578 encode_latin1 (gunichar c
, char *outbuf
, size_t outleft
)
581 mono_set_errno (E2BIG
);
586 mono_set_errno (EILSEQ
);
597 * Simple conversion API
600 static gpointer error_quark
= (gpointer
)"ConvertError";
603 g_convert_error_quark (void)
609 g_convert (const gchar
*str
, gssize len
, const gchar
*to_charset
, const gchar
*from_charset
,
610 gsize
*bytes_read
, gsize
*bytes_written
, GError
**err
)
612 gsize outsize
, outused
, outleft
, inleft
, grow
, rc
;
613 char *result
, *outbuf
, *inbuf
;
614 gboolean flush
= FALSE
;
615 gboolean done
= FALSE
;
618 g_return_val_if_fail (str
!= NULL
, NULL
);
619 g_return_val_if_fail (to_charset
!= NULL
, NULL
);
620 g_return_val_if_fail (from_charset
!= NULL
, NULL
);
622 if ((cd
= g_iconv_open (to_charset
, from_charset
)) == (GIConv
) -1) {
623 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_NO_CONVERSION
,
624 "Conversion from %s to %s not supported.",
625 from_charset
, to_charset
);
636 inleft
= len
< 0 ? strlen (str
) : len
;
637 inbuf
= (char *) str
;
639 outleft
= outsize
= MAX (inleft
, 8);
640 outbuf
= result
= g_malloc (outsize
+ 4);
644 rc
= g_iconv (cd
, &inbuf
, &inleft
, &outbuf
, &outleft
);
646 rc
= g_iconv (cd
, NULL
, NULL
, &outbuf
, &outleft
);
648 if (rc
== (gsize
) -1) {
651 /* grow our result buffer */
652 grow
= MAX (inleft
, 8) << 1;
653 outused
= outbuf
- result
;
656 result
= g_realloc (result
, outsize
+ 4);
657 outbuf
= result
+ outused
;
660 /* incomplete input, stop converting and terminate here */
667 /* illegal sequence in the input */
668 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
, "%s", g_strerror (errno
));
671 /* save offset of the illegal input sequence */
672 *bytes_read
= (inbuf
- str
);
683 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_FAILED
, "%s", g_strerror (errno
));
696 /* input has been converted and output has been flushed */
699 /* input has been converted, need to flush the output */
706 /* Note: not all charsets can be null-terminated with a single
707 null byte. UCS2, for example, needs 2 null bytes and UCS4
708 needs 4. I hope that 4 null bytes is enough to terminate all
709 multibyte charsets? */
711 /* null-terminate the result */
712 memset (outbuf
, 0, 4);
715 *bytes_written
= outbuf
- result
;
718 *bytes_read
= inbuf
- str
;
729 * An explanation of the conversion can be found at:
730 * http://home.tiscali.nl/t876506/utf8tbl.html
734 g_unichar_to_utf8 (gunichar c
, gchar
*outbuf
)
741 } else if (c
< 0x800) {
744 } else if (c
< 0x10000) {
747 } else if (c
< 0x200000) {
750 } else if (c
< 0x4000000) {
753 } else if (c
< 0x80000000) {
760 if (outbuf
!= NULL
) {
761 for (i
= n
- 1; i
> 0; i
--) {
762 /* mask off 6 bits worth and add 128 */
763 outbuf
[i
] = (c
& 0x3f) | 0x80;
767 /* first character has a different base */
768 outbuf
[0] = c
| base
;
774 static FORCE_INLINE (int)
775 g_unichar_to_utf16 (gunichar c
, gunichar2
*outbuf
)
781 *outbuf
= (gunichar2
) c
;
784 } else if (c
< 0xe000) {
786 } else if (c
< 0x10000) {
788 *outbuf
= (gunichar2
) c
;
791 } else if (c
< 0x110000) {
795 outbuf
[0] = (gunichar2
) ((c2
>> 10) + 0xd800);
796 outbuf
[1] = (gunichar2
) ((c2
& 0x3ff) + 0xdc00);
806 g_utf8_to_ucs4_fast (const gchar
*str
, glong len
, glong
*items_written
)
808 gunichar
*outbuf
, *outptr
;
812 g_return_val_if_fail (str
!= NULL
, NULL
);
814 n
= g_utf8_strlen (str
, len
);
819 outptr
= outbuf
= g_malloc ((n
+ 1) * sizeof (gunichar
));
820 inptr
= (char *) str
;
822 for (i
= 0; i
< n
; i
++) {
823 *outptr
++ = g_utf8_get_char (inptr
);
824 inptr
= g_utf8_next_char (inptr
);
833 eg_utf8_to_utf16_general (const gchar
*str
, glong len
, glong
*items_read
, glong
*items_written
, gboolean include_nuls
, gboolean replace_invalid_codepoints
, GError
**err
)
835 gunichar2
*outbuf
, *outptr
;
842 g_return_val_if_fail (str
!= NULL
, NULL
);
846 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_FAILED
, "Conversions with embedded nulls must pass the string length");
853 inptr
= (char *) str
;
857 if ((n
= decode_utf8 (inptr
, inleft
, &c
)) < 0)
860 if (c
== 0 && !include_nuls
)
863 if ((u
= g_unichar_to_utf16 (c
, NULL
)) < 0) {
864 if (replace_invalid_codepoints
) {
867 mono_set_errno (EILSEQ
);
878 *items_read
= inptr
- str
;
881 *items_written
= outlen
;
883 outptr
= outbuf
= g_malloc ((outlen
+ 1) * sizeof (gunichar2
));
884 inptr
= (char *) str
;
888 if ((n
= decode_utf8 (inptr
, inleft
, &c
)) < 0)
891 if (c
== 0 && !include_nuls
)
894 u
= g_unichar_to_utf16 (c
, outptr
);
895 if ((u
< 0) && replace_invalid_codepoints
) {
911 if (errno
== EILSEQ
) {
912 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
913 "Illegal byte sequence encounted in the input.");
914 } else if (items_read
) {
915 /* partial input is ok if we can let our caller know... */
917 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_PARTIAL_INPUT
,
918 "Partial byte sequence encountered in the input.");
922 *items_read
= inptr
- str
;
931 g_utf8_to_utf16 (const gchar
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
933 return eg_utf8_to_utf16_general (str
, len
, items_read
, items_written
, FALSE
, FALSE
, err
);
937 eg_utf8_to_utf16_with_nuls (const gchar
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
939 return eg_utf8_to_utf16_general (str
, len
, items_read
, items_written
, TRUE
, FALSE
, err
);
943 eg_wtf8_to_utf16 (const gchar
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
945 return eg_utf8_to_utf16_general (str
, len
, items_read
, items_written
, TRUE
, TRUE
, err
);
949 g_utf8_to_ucs4 (const gchar
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
951 gunichar
*outbuf
, *outptr
;
958 g_return_val_if_fail (str
!= NULL
, NULL
);
963 inptr
= (char *) str
;
967 if ((n
= decode_utf8 (inptr
, inleft
, &c
)) < 0) {
968 if (errno
== EILSEQ
) {
969 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
970 "Illegal byte sequence encounted in the input.");
971 } else if (items_read
) {
972 /* partial input is ok if we can let our caller know... */
975 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_PARTIAL_INPUT
,
976 "Partial byte sequence encountered in the input.");
980 *items_read
= inptr
- str
;
995 *items_written
= outlen
/ 4;
998 *items_read
= inptr
- str
;
1000 outptr
= outbuf
= g_malloc (outlen
+ 4);
1001 inptr
= (char *) str
;
1004 while (inleft
> 0) {
1005 if ((n
= decode_utf8 (inptr
, inleft
, &c
)) < 0)
1021 g_utf16_to_utf8 (const gunichar2
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
1023 char *inptr
, *outbuf
, *outptr
;
1029 g_return_val_if_fail (str
!= NULL
, NULL
);
1037 inptr
= (char *) str
;
1040 while (inleft
> 0) {
1041 if ((n
= decode_utf16 (inptr
, inleft
, &c
)) < 0) {
1042 if (n
== -2 && inleft
> 2) {
1043 /* This means that the first UTF-16 char was read, but second failed */
1048 if (errno
== EILSEQ
) {
1049 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
1050 "Illegal byte sequence encounted in the input.");
1051 } else if (items_read
) {
1052 /* partial input is ok if we can let our caller know... */
1055 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_PARTIAL_INPUT
,
1056 "Partial byte sequence encountered in the input.");
1060 *items_read
= (inptr
- (char *) str
) / 2;
1069 outlen
+= g_unichar_to_utf8 (c
, NULL
);
1075 *items_read
= (inptr
- (char *) str
) / 2;
1078 *items_written
= outlen
;
1080 outptr
= outbuf
= g_malloc (outlen
+ 1);
1081 inptr
= (char *) str
;
1084 while (inleft
> 0) {
1085 if ((n
= decode_utf16 (inptr
, inleft
, &c
)) < 0)
1090 outptr
+= g_unichar_to_utf8 (c
, outptr
);
1101 g_utf16_to_ucs4 (const gunichar2
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
1103 gunichar
*outbuf
, *outptr
;
1110 g_return_val_if_fail (str
!= NULL
, NULL
);
1118 inptr
= (char *) str
;
1121 while (inleft
> 0) {
1122 if ((n
= decode_utf16 (inptr
, inleft
, &c
)) < 0) {
1123 if (n
== -2 && inleft
> 2) {
1124 /* This means that the first UTF-16 char was read, but second failed */
1129 if (errno
== EILSEQ
) {
1130 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
1131 "Illegal byte sequence encounted in the input.");
1132 } else if (items_read
) {
1133 /* partial input is ok if we can let our caller know... */
1136 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_PARTIAL_INPUT
,
1137 "Partial byte sequence encountered in the input.");
1141 *items_read
= (inptr
- (char *) str
) / 2;
1156 *items_read
= (inptr
- (char *) str
) / 2;
1159 *items_written
= outlen
/ 4;
1161 outptr
= outbuf
= g_malloc (outlen
+ 4);
1162 inptr
= (char *) str
;
1165 while (inleft
> 0) {
1166 if ((n
= decode_utf16 (inptr
, inleft
, &c
)) < 0)
1182 g_ucs4_to_utf8 (const gunichar
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
1184 char *outbuf
, *outptr
;
1189 g_return_val_if_fail (str
!= NULL
, NULL
);
1192 for (i
= 0; str
[i
] != 0; i
++) {
1193 if ((n
= g_unichar_to_utf8 (str
[i
], NULL
)) < 0) {
1194 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
1195 "Illegal byte sequence encounted in the input.");
1209 for (i
= 0; i
< len
&& str
[i
] != 0; i
++) {
1210 if ((n
= g_unichar_to_utf8 (str
[i
], NULL
)) < 0) {
1211 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
1212 "Illegal byte sequence encounted in the input.");
1229 outptr
= outbuf
= g_malloc (outlen
+ 1);
1230 for (i
= 0; i
< len
; i
++)
1231 outptr
+= g_unichar_to_utf8 (str
[i
], outptr
);
1235 *items_written
= outlen
;
1244 g_ucs4_to_utf16 (const gunichar
*str
, glong len
, glong
*items_read
, glong
*items_written
, GError
**err
)
1246 gunichar2
*outbuf
, *outptr
;
1251 g_return_val_if_fail (str
!= NULL
, NULL
);
1254 for (i
= 0; str
[i
] != 0; i
++) {
1255 if ((n
= g_unichar_to_utf16 (str
[i
], NULL
)) < 0) {
1256 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
1257 "Illegal byte sequence encounted in the input.");
1271 for (i
= 0; i
< len
&& str
[i
] != 0; i
++) {
1272 if ((n
= g_unichar_to_utf16 (str
[i
], NULL
)) < 0) {
1273 g_set_error (err
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
1274 "Illegal byte sequence encounted in the input.");
1291 outptr
= outbuf
= g_malloc ((outlen
+ 1) * sizeof (gunichar2
));
1292 for (i
= 0; i
< len
; i
++)
1293 outptr
+= g_unichar_to_utf16 (str
[i
], outptr
);
1297 *items_written
= outlen
;