2005-01-02 Ben Maurer <bmaurer@ximian.com>
[mono-project.git] / mono / utils / strenc.c
blobb2a5d7e46aa5d0b056848fd9e4626ca52ced58bc
1 /*
2 * strenc.c: string encoding conversions
4 * Author:
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
8 */
10 #include <config.h>
11 #include <glib.h>
12 #include <string.h>
14 #include "strenc.h"
16 #undef DEBUG
18 /* Tries to turn a NULL-terminated string into UTF16.
20 * First, see if it's valid UTF8, in which case just turn it directly
21 * into UTF16. Next, run through the colon-separated encodings in
22 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
23 * returning the first successful conversion to UTF16. If no
24 * conversion succeeds, return NULL.
26 * Callers must free the returned string if not NULL. bytes holds the number
27 * of bytes in the returned string, not including the terminator.
29 gunichar2 *mono_unicode_from_external (const gchar *in, gsize *bytes)
31 gchar *res=NULL;
32 gchar **encodings;
33 const gchar *encoding_list;
34 int i;
35 glong lbytes;
37 if(in==NULL) {
38 return(NULL);
41 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
42 if(encoding_list==NULL) {
43 encoding_list = "";
46 encodings=g_strsplit (encoding_list, ":", 0);
47 for(i=0;encodings[i]!=NULL; i++) {
48 #ifdef DEBUG
49 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
50 encodings[i]);
51 #endif
52 /* "default_locale" is a special case encoding */
53 if(!strcmp (encodings[i], "default_locale")) {
54 gchar *utf8=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
55 if(utf8!=NULL) {
56 res=(gchar *) g_utf8_to_utf16 (utf8, -1, NULL, &lbytes, NULL);
57 *bytes = (gsize) lbytes;
59 g_free (utf8);
60 } else {
61 res=g_convert (in, -1, "UTF16", encodings[i], NULL, bytes, NULL);
64 if(res!=NULL) {
65 g_strfreev (encodings);
66 *bytes *= 2;
67 return((gunichar2 *)res);
71 g_strfreev (encodings);
73 if(g_utf8_validate (in, -1, NULL)) {
74 gunichar2 *unires=g_utf8_to_utf16 (in, -1, NULL, (glong *)bytes, NULL);
75 *bytes *= 2;
76 return(unires);
79 return(NULL);
82 /* Tries to turn a NULL-terminated string into UTF8.
84 * First, see if it's valid UTF8, in which case there's nothing more
85 * to be done. Next, run through the colon-separated encodings in
86 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
87 * returning the first successful conversion to utf8. If no
88 * conversion succeeds, return NULL.
90 * Callers must free the returned string if not NULL.
92 * This function is identical to mono_unicode_from_external, apart
93 * from returning utf8 not utf16; it's handy in a few places to work
94 * in utf8.
96 gchar *mono_utf8_from_external (const gchar *in)
98 gchar *res=NULL;
99 gchar **encodings;
100 const gchar *encoding_list;
101 int i;
103 if(in==NULL) {
104 return(NULL);
107 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
108 if(encoding_list==NULL) {
109 encoding_list = "";
112 encodings=g_strsplit (encoding_list, ":", 0);
113 for(i=0;encodings[i]!=NULL; i++) {
114 #ifdef DEBUG
115 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
116 encodings[i]);
117 #endif
119 /* "default_locale" is a special case encoding */
120 if(!strcmp (encodings[i], "default_locale")) {
121 res=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
122 if(res!=NULL && !g_utf8_validate (res, -1, NULL)) {
123 g_free (res);
124 res=NULL;
126 } else {
127 res=g_convert (in, -1, "UTF8", encodings[i], NULL,
128 NULL, NULL);
131 if(res!=NULL) {
132 g_strfreev (encodings);
133 return(res);
137 g_strfreev (encodings);
139 if(g_utf8_validate (in, -1, NULL)) {
140 return(g_strdup (in));
143 return(NULL);
146 /* Turns NULL-terminated UTF16 into either UTF8, or the first
147 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
148 * work, then UTF8 is returned.
150 * Callers must free the returned string.
152 gchar *mono_unicode_to_external (const gunichar2 *uni)
154 gchar *utf8;
155 const gchar *encoding_list;
157 /* Turn the unicode into utf8 to start with, because its
158 * easier to work with gchar * than gunichar2 *
160 utf8=g_utf16_to_utf8 (uni, -1, NULL, NULL, NULL);
161 g_assert (utf8!=NULL);
163 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
164 if(encoding_list==NULL) {
165 /* Do UTF8 */
166 return(utf8);
167 } else {
168 gchar *res, **encodings;
169 int i;
171 encodings=g_strsplit (encoding_list, ":", 0);
172 for(i=0; encodings[i]!=NULL; i++) {
173 if(!strcmp (encodings[i], "default_locale")) {
174 res=g_locale_from_utf8 (utf8, -1, NULL, NULL,
175 NULL);
176 } else {
177 res=g_convert (utf8, -1, encodings[i], "UTF8",
178 NULL, NULL, NULL);
181 if(res!=NULL) {
182 g_free (utf8);
183 g_strfreev (encodings);
185 return(res);
189 g_strfreev (encodings);
192 /* Nothing else worked, so just return the utf8 */
193 return(utf8);