* TextBoxTest: More of these tests work now.
[mono-project.git] / mono / utils / strenc.c
blobc37bb3d20168ffb6c93473b33ce7558b4da2ffb7
1 /*
2 * strenc.c: string encoding conversions
4 * Author:
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
8 */
10 #include <config.h>
11 #include <glib.h>
12 #include <string.h>
14 #include "strenc.h"
16 #undef DEBUG
18 /**
19 * mono_unicode_from_external:
20 * @in: pointers to the buffer.
21 * @bytes: number of bytes in the string.
23 * Tries to turn a NULL-terminated string into UTF16.
25 * First, see if it's valid UTF8, in which case just turn it directly
26 * into UTF16. Next, run through the colon-separated encodings in
27 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
28 * returning the first successful conversion to UTF16. If no
29 * conversion succeeds, return NULL.
31 * Callers must free the returned string if not NULL. bytes holds the number
32 * of bytes in the returned string, not including the terminator.
34 gunichar2 *
35 mono_unicode_from_external (const gchar *in, gsize *bytes)
37 gchar *res=NULL;
38 gchar **encodings;
39 const gchar *encoding_list;
40 int i;
41 glong lbytes;
43 if(in==NULL) {
44 return(NULL);
47 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
48 if(encoding_list==NULL) {
49 encoding_list = "";
52 encodings=g_strsplit (encoding_list, ":", 0);
53 for(i=0;encodings[i]!=NULL; i++) {
54 #ifdef DEBUG
55 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
56 encodings[i]);
57 #endif
58 /* "default_locale" is a special case encoding */
59 if(!strcmp (encodings[i], "default_locale")) {
60 gchar *utf8=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
61 if(utf8!=NULL) {
62 res=(gchar *) g_utf8_to_utf16 (utf8, -1, NULL, &lbytes, NULL);
63 *bytes = (gsize) lbytes;
65 g_free (utf8);
66 } else {
67 /* Don't use UTF16 here. It returns the <FF FE> prepended to the string */
68 res = g_convert (in, strlen (in), "UTF8", encodings[i], NULL, bytes, NULL);
69 if (res != NULL) {
70 gchar *ptr = res;
71 res = (gchar *) g_utf8_to_utf16 (res, -1, NULL, &lbytes, NULL);
72 *bytes = (gsize) lbytes;
73 g_free (ptr);
77 if(res!=NULL) {
78 g_strfreev (encodings);
79 *bytes *= 2;
80 return((gunichar2 *)res);
84 g_strfreev (encodings);
86 if(g_utf8_validate (in, -1, NULL)) {
87 gunichar2 *unires=g_utf8_to_utf16 (in, -1, NULL, (glong *)bytes, NULL);
88 *bytes *= 2;
89 return(unires);
92 return(NULL);
95 /**
96 * mono_utf8_from_external:
97 * @in: pointer to the string buffer.
99 * Tries to turn a NULL-terminated string into UTF8.
101 * First, see if it's valid UTF8, in which case there's nothing more
102 * to be done. Next, run through the colon-separated encodings in
103 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
104 * returning the first successful conversion to utf8. If no
105 * conversion succeeds, return NULL.
107 * Callers must free the returned string if not NULL.
109 * This function is identical to mono_unicode_from_external, apart
110 * from returning utf8 not utf16; it's handy in a few places to work
111 * in utf8.
113 gchar *mono_utf8_from_external (const gchar *in)
115 gchar *res=NULL;
116 gchar **encodings;
117 const gchar *encoding_list;
118 int i;
120 if(in==NULL) {
121 return(NULL);
124 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
125 if(encoding_list==NULL) {
126 encoding_list = "";
129 encodings=g_strsplit (encoding_list, ":", 0);
130 for(i=0;encodings[i]!=NULL; i++) {
131 #ifdef DEBUG
132 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
133 encodings[i]);
134 #endif
136 /* "default_locale" is a special case encoding */
137 if(!strcmp (encodings[i], "default_locale")) {
138 res=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
139 if(res!=NULL && !g_utf8_validate (res, -1, NULL)) {
140 g_free (res);
141 res=NULL;
143 } else {
144 res=g_convert (in, -1, "UTF8", encodings[i], NULL,
145 NULL, NULL);
148 if(res!=NULL) {
149 g_strfreev (encodings);
150 return(res);
154 g_strfreev (encodings);
156 if(g_utf8_validate (in, -1, NULL)) {
157 return(g_strdup (in));
160 return(NULL);
164 * mono_unicode_to_external:
165 * @uni: an UTF16 string to conver to an external representation.
167 * Turns NULL-terminated UTF16 into either UTF8, or the first
168 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
169 * work, then UTF8 is returned.
171 * Callers must free the returned string.
173 gchar *mono_unicode_to_external (const gunichar2 *uni)
175 gchar *utf8;
176 const gchar *encoding_list;
178 /* Turn the unicode into utf8 to start with, because its
179 * easier to work with gchar * than gunichar2 *
181 utf8=g_utf16_to_utf8 (uni, -1, NULL, NULL, NULL);
182 g_assert (utf8!=NULL);
184 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
185 if(encoding_list==NULL) {
186 /* Do UTF8 */
187 return(utf8);
188 } else {
189 gchar *res, **encodings;
190 int i;
192 encodings=g_strsplit (encoding_list, ":", 0);
193 for(i=0; encodings[i]!=NULL; i++) {
194 if(!strcmp (encodings[i], "default_locale")) {
195 res=g_locale_from_utf8 (utf8, -1, NULL, NULL,
196 NULL);
197 } else {
198 res=g_convert (utf8, -1, encodings[i], "UTF8",
199 NULL, NULL, NULL);
202 if(res!=NULL) {
203 g_free (utf8);
204 g_strfreev (encodings);
206 return(res);
210 g_strfreev (encodings);
213 /* Nothing else worked, so just return the utf8 */
214 return(utf8);