2 * strenc.c: string encoding conversions
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
19 * mono_unicode_from_external:
20 * @in: pointers to the buffer.
21 * @bytes: number of bytes in the string.
23 * Tries to turn a NULL-terminated string into UTF16.
25 * First, see if it's valid UTF8, in which case just turn it directly
26 * into UTF16. Next, run through the colon-separated encodings in
27 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
28 * returning the first successful conversion to UTF16. If no
29 * conversion succeeds, return NULL.
31 * Callers must free the returned string if not NULL. bytes holds the number
32 * of bytes in the returned string, not including the terminator.
35 mono_unicode_from_external (const gchar
*in
, gsize
*bytes
)
39 const gchar
*encoding_list
;
47 encoding_list
=g_getenv ("MONO_EXTERNAL_ENCODINGS");
48 if(encoding_list
==NULL
) {
52 encodings
=g_strsplit (encoding_list
, ":", 0);
53 for(i
=0;encodings
[i
]!=NULL
; i
++) {
55 g_message (G_GNUC_PRETTY_FUNCTION
": Trying encoding [%s]",
58 /* "default_locale" is a special case encoding */
59 if(!strcmp (encodings
[i
], "default_locale")) {
60 gchar
*utf8
=g_locale_to_utf8 (in
, -1, NULL
, NULL
, NULL
);
62 res
=(gchar
*) g_utf8_to_utf16 (utf8
, -1, NULL
, &lbytes
, NULL
);
63 *bytes
= (gsize
) lbytes
;
67 /* Don't use UTF16 here. It returns the <FF FE> prepended to the string */
68 res
= g_convert (in
, strlen (in
), "UTF8", encodings
[i
], NULL
, bytes
, NULL
);
71 res
= (gchar
*) g_utf8_to_utf16 (res
, -1, NULL
, &lbytes
, NULL
);
72 *bytes
= (gsize
) lbytes
;
78 g_strfreev (encodings
);
80 return((gunichar2
*)res
);
84 g_strfreev (encodings
);
86 if(g_utf8_validate (in
, -1, NULL
)) {
87 gunichar2
*unires
=g_utf8_to_utf16 (in
, -1, NULL
, (glong
*)bytes
, NULL
);
96 * mono_utf8_from_external:
97 * @in: pointer to the string buffer.
99 * Tries to turn a NULL-terminated string into UTF8.
101 * First, see if it's valid UTF8, in which case there's nothing more
102 * to be done. Next, run through the colon-separated encodings in
103 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
104 * returning the first successful conversion to utf8. If no
105 * conversion succeeds, return NULL.
107 * Callers must free the returned string if not NULL.
109 * This function is identical to mono_unicode_from_external, apart
110 * from returning utf8 not utf16; it's handy in a few places to work
113 gchar
*mono_utf8_from_external (const gchar
*in
)
117 const gchar
*encoding_list
;
124 encoding_list
=g_getenv ("MONO_EXTERNAL_ENCODINGS");
125 if(encoding_list
==NULL
) {
129 encodings
=g_strsplit (encoding_list
, ":", 0);
130 for(i
=0;encodings
[i
]!=NULL
; i
++) {
132 g_message (G_GNUC_PRETTY_FUNCTION
": Trying encoding [%s]",
136 /* "default_locale" is a special case encoding */
137 if(!strcmp (encodings
[i
], "default_locale")) {
138 res
=g_locale_to_utf8 (in
, -1, NULL
, NULL
, NULL
);
139 if(res
!=NULL
&& !g_utf8_validate (res
, -1, NULL
)) {
144 res
=g_convert (in
, -1, "UTF8", encodings
[i
], NULL
,
149 g_strfreev (encodings
);
154 g_strfreev (encodings
);
156 if(g_utf8_validate (in
, -1, NULL
)) {
157 return(g_strdup (in
));
164 * mono_unicode_to_external:
165 * @uni: an UTF16 string to conver to an external representation.
167 * Turns NULL-terminated UTF16 into either UTF8, or the first
168 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
169 * work, then UTF8 is returned.
171 * Callers must free the returned string.
173 gchar
*mono_unicode_to_external (const gunichar2
*uni
)
176 const gchar
*encoding_list
;
178 /* Turn the unicode into utf8 to start with, because its
179 * easier to work with gchar * than gunichar2 *
181 utf8
=g_utf16_to_utf8 (uni
, -1, NULL
, NULL
, NULL
);
182 g_assert (utf8
!=NULL
);
184 encoding_list
=g_getenv ("MONO_EXTERNAL_ENCODINGS");
185 if(encoding_list
==NULL
) {
189 gchar
*res
, **encodings
;
192 encodings
=g_strsplit (encoding_list
, ":", 0);
193 for(i
=0; encodings
[i
]!=NULL
; i
++) {
194 if(!strcmp (encodings
[i
], "default_locale")) {
195 res
=g_locale_from_utf8 (utf8
, -1, NULL
, NULL
,
198 res
=g_convert (utf8
, -1, encodings
[i
], "UTF8",
204 g_strfreev (encodings
);
210 g_strfreev (encodings
);
213 /* Nothing else worked, so just return the utf8 */