Support VERSION_REVTYPE git builds on cleanup_checkout.sh
[freeciv.git] / utility / fciconv.c
blob8d4c0585297036977b108615c148c9a8d2fff1e4
1 /**********************************************************************
2 Freeciv - Copyright (C) 2003-2004 - The Freeciv Project
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2, or (at your option)
6 any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12 ***********************************************************************/
14 #ifdef HAVE_CONFIG_H
15 #include <fc_config.h>
16 #endif
18 #include <errno.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
23 #ifdef HAVE_ICONV
24 #include <iconv.h>
25 #endif
27 #ifdef HAVE_LANGINFO_CODESET
28 #include <langinfo.h>
29 #endif
31 #ifdef HAVE_LIBCHARSET
32 #include <libcharset.h>
33 #endif
35 /* utility */
36 #include "fciconv.h"
37 #include "fcintl.h"
38 #include "log.h"
39 #include "mem.h"
40 #include "support.h"
42 static bool is_init = FALSE;
43 static char convert_buffer[4096];
44 static const char *transliteration_string;
46 #ifdef HAVE_ICONV
47 static const char *local_encoding, *data_encoding, *internal_encoding;
48 #else /* HAVE_ICONV */
49 /* Hack to confuse the compiler into working. */
50 # define local_encoding get_local_encoding()
51 # define data_encoding get_local_encoding()
52 # define internal_encoding get_local_encoding()
53 #endif /* HAVE_ICONV */
55 /***************************************************************************
56 Must be called during the initialization phase of server and client to
57 initialize the character encodings to be used.
59 Pass an internal encoding of NULL to use the local encoding internally.
60 ***************************************************************************/
61 void init_character_encodings(const char *my_internal_encoding,
62 bool my_use_transliteration)
64 transliteration_string = "";
65 #ifdef HAVE_ICONV
66 if (my_use_transliteration) {
67 transliteration_string = "//TRANSLIT";
70 /* Set the data encoding - first check $FREECIV_DATA_ENCODING,
71 * then fall back to the default. */
72 data_encoding = getenv("FREECIV_DATA_ENCODING");
73 if (!data_encoding) {
74 data_encoding = FC_DEFAULT_DATA_ENCODING;
77 /* Set the local encoding - first check $FREECIV_LOCAL_ENCODING,
78 * then ask the system. */
79 local_encoding = getenv("FREECIV_LOCAL_ENCODING");
80 if (!local_encoding) {
81 #ifdef HAVE_LIBCHARSET
82 local_encoding = locale_charset();
83 #else /* HAVE_LIBCHARSET */
84 #ifdef HAVE_LANGINFO_CODESET
85 local_encoding = nl_langinfo(CODESET);
86 #else /* HAVE_LANGINFO_CODESET */
87 local_encoding = "";
88 #endif /* HAVE_LANGINFO_CODESET */
89 #endif /* HAVE_LIBCHARSET */
90 if (fc_strcasecmp(local_encoding, "ANSI_X3.4-1968") == 0
91 || fc_strcasecmp(local_encoding, "ASCII") == 0
92 || fc_strcasecmp(local_encoding, "US-ASCII") == 0) {
93 /* HACK: use latin1 instead of ascii in typical cases when the
94 * encoding is unconfigured. */
95 local_encoding = "ISO-8859-1";
98 if (fc_strcasecmp(local_encoding, "646") == 0) {
99 /* HACK: On Solaris the encoding always comes up as "646" (ascii),
100 * which iconv doesn't understand. Work around it by using UTF-8
101 * instead. */
102 local_encoding = "UTF-8";
106 /* Set the internal encoding - first check $FREECIV_INTERNAL_ENCODING,
107 * then check the passed-in default value, then fall back to the local
108 * encoding. */
109 internal_encoding = getenv("FREECIV_INTERNAL_ENCODING");
110 if (!internal_encoding) {
111 internal_encoding = my_internal_encoding;
113 if (!internal_encoding) {
114 internal_encoding = local_encoding;
118 #ifdef ENABLE_NLS
119 bind_textdomain_codeset(PACKAGE, internal_encoding);
120 #endif
122 #ifdef DEBUG
123 fprintf(stderr, "Encodings: Data=%s, Local=%s, Internal=%s\n",
124 data_encoding, local_encoding, internal_encoding);
125 #endif /* DEBUG */
127 #else /* HAVE_ICONV */
128 /* log_* may not work at this point. */
129 fprintf(stderr,
130 _("You are running Freeciv without using iconv. Unless\n"
131 "you are using the UTF-8 character set, some characters\n"
132 "may not be displayed properly. You can download iconv\n"
133 "at http://gnu.org/.\n"));
134 #endif /* HAVE_ICONV */
136 is_init = TRUE;
139 /***************************************************************************
140 Return the data encoding (usually UTF-8).
141 ***************************************************************************/
142 const char *get_data_encoding(void)
144 fc_assert_ret_val(is_init, NULL);
145 return data_encoding;
148 /***************************************************************************
149 Return the local encoding (dependent on the system).
150 ***************************************************************************/
151 const char *get_local_encoding(void)
153 #ifdef HAVE_ICONV
154 fc_assert_ret_val(is_init, NULL);
155 return local_encoding;
156 #else /* HAVE_ICONV */
157 # ifdef HAVE_LIBCHARSET
158 return locale_charset();
159 # else /* HAVE_LIBCHARSET */
160 # ifdef HAVE_LANGINFO_CODESET
161 return nl_langinfo(CODESET);
162 # else /* HAVE_LANGINFO_CODESET */
163 return "";
164 # endif /* HAVE_LANGINFO_CODESET */
165 # endif /* HAVE_LIBCHARSET */
166 #endif /* HAVE_ICONV */
169 /***************************************************************************
170 Return the internal encoding. This depends on the server or GUI being
171 used.
172 ***************************************************************************/
173 const char *get_internal_encoding(void)
175 fc_assert_ret_val(is_init, NULL);
176 return internal_encoding;
179 /***************************************************************************
180 Convert the text. Both 'from' and 'to' must be 8-bit charsets. The
181 result will be put into the buf buffer unless it is NULL, in which case it
182 will be allocated on demand.
184 Don't use this function if you can avoid it. Use one of the
185 xxx_to_yyy_string functions.
186 ***************************************************************************/
187 char *convert_string(const char *text,
188 const char *from,
189 const char *to,
190 char *buf, size_t bufsz)
192 #ifdef HAVE_ICONV
193 iconv_t cd = iconv_open(to, from);
194 size_t from_len = strlen(text) + 1, to_len;
195 bool alloc = (buf == NULL);
197 fc_assert_ret_val(is_init && NULL != from && NULL != to, NULL);
198 fc_assert_ret_val(NULL != text, NULL);
200 if (cd == (iconv_t) (-1)) {
201 /* Do not do potentially recursive call to freeciv logging here,
202 * but use fprintf(stderr) */
203 /* Use the real OS-provided strerror and errno rather than Freeciv's
204 * abstraction, as that wouldn't do the correct thing with third-party
205 * iconv on Windows */
207 /* TRANS: "Could not convert text from <encoding a> to <encoding b>:"
208 * <externally translated error string>."*/
209 fprintf(stderr, _("Could not convert text from %s to %s: %s.\n"),
210 from, to, strerror(errno));
211 /* The best we can do? */
212 if (alloc) {
213 return fc_strdup(text);
214 } else {
215 fc_snprintf(buf, bufsz, "%s", text);
216 return buf;
220 if (alloc) {
221 to_len = from_len;
222 } else {
223 to_len = bufsz;
226 do {
227 size_t flen = from_len, tlen = to_len, res;
228 const char *mytext = text;
229 char *myresult;
231 if (alloc) {
232 buf = fc_malloc(to_len);
235 myresult = buf;
237 /* Since we may do multiple translations, we may need to reset iconv
238 * in between. */
239 iconv(cd, NULL, NULL, NULL, NULL);
241 res = iconv(cd, (ICONV_CONST char **)&mytext, &flen, &myresult, &tlen);
242 if (res == (size_t) (-1)) {
243 if (errno != E2BIG) {
244 /* Invalid input. */
246 fprintf(stderr, "Invalid string conversion from %s to %s: %s.\n",
247 from, to, strerror(errno));
248 iconv_close(cd);
249 if (alloc) {
250 free(buf);
251 return fc_strdup(text); /* The best we can do? */
252 } else {
253 fc_snprintf(buf, bufsz, "%s", text);
254 return buf;
257 } else {
258 /* Success. */
259 iconv_close(cd);
261 /* There may be wasted space here, but there's nothing we can do
262 * about it. */
263 return buf;
266 if (alloc) {
267 /* Not enough space; try again. */
268 buf[to_len - 1] = 0;
270 free(buf);
271 to_len *= 2;
273 } while (alloc);
275 return buf;
276 #else /* HAVE_ICONV */
277 if (buf) {
278 strncpy(buf, text, bufsz);
279 buf[bufsz - 1] = '\0';
280 return buf;
281 } else {
282 return fc_strdup(text);
284 #endif /* HAVE_ICONV */
287 #define CONV_FUNC_MALLOC(src, dst) \
288 char *src ## _to_ ## dst ## _string_malloc(const char *text) \
290 const char *encoding1 = (dst ## _encoding); \
291 char encoding[strlen(encoding1) + strlen(transliteration_string) + 1]; \
293 fc_snprintf(encoding, sizeof(encoding), \
294 "%s%s", encoding1, transliteration_string); \
295 return convert_string(text, (src ## _encoding), \
296 (encoding), NULL, 0); \
299 #define CONV_FUNC_BUFFER(src, dst) \
300 char *src ## _to_ ## dst ## _string_buffer(const char *text, \
301 char *buf, size_t bufsz) \
303 const char *encoding1 = (dst ## _encoding); \
304 char encoding[strlen(encoding1) + strlen(transliteration_string) + 1]; \
306 fc_snprintf(encoding, sizeof(encoding), \
307 "%s%s", encoding1, transliteration_string); \
308 return convert_string(text, (src ## _encoding), \
309 encoding, buf, bufsz); \
312 #define CONV_FUNC_STATIC(src, dst) \
313 char *src ## _to_ ## dst ## _string_static(const char *text) \
315 (src ## _to_ ## dst ## _string_buffer)(text, \
316 convert_buffer, \
317 sizeof(convert_buffer)); \
318 return convert_buffer; \
321 CONV_FUNC_MALLOC(data, internal)
322 CONV_FUNC_MALLOC(internal, data)
323 CONV_FUNC_MALLOC(internal, local)
324 CONV_FUNC_MALLOC(local, internal)
326 CONV_FUNC_BUFFER(local, internal)
327 CONV_FUNC_BUFFER(internal, local)
329 static CONV_FUNC_STATIC(internal, local)
331 /***************************************************************************
332 Do a fprintf from the internal charset into the local charset.
333 ***************************************************************************/
334 void fc_fprintf(FILE *stream, const char *format, ...)
336 va_list ap;
337 char string[4096];
338 const char *output;
339 static bool recursion = FALSE;
341 /* The recursion variable is used to prevent a recursive loop. If
342 * an iconv conversion fails, then log_* will be called and an
343 * fc_fprintf will be done. But below we do another iconv conversion
344 * on the error messages, which is of course likely to fail also. */
345 if (recursion) {
346 return;
349 va_start(ap, format);
350 fc_vsnprintf(string, sizeof(string), format, ap);
351 va_end(ap);
353 recursion = TRUE;
354 if (is_init) {
355 output = internal_to_local_string_static(string);
356 } else {
357 output = string;
359 recursion = FALSE;
361 fputs(output, stream);
362 fflush(stream);
365 /****************************************************************************
366 Return the length, in *characters*, of the string. This can be used in
367 place of strlen in some places because it returns the number of characters
368 not the number of bytes (with multi-byte characters in UTF-8, the two
369 may not be the same).
371 Use of this function outside of GUI layout code is probably a hack. For
372 instance the demographics code uses it, but this should instead pass the
373 data directly to the GUI library for formatting.
374 ****************************************************************************/
375 size_t get_internal_string_length(const char *text)
377 int text2[(strlen(text) + 1)]; /* UCS-4 text */
378 int i;
379 int len = 0;
381 convert_string(text, internal_encoding, "UCS-4",
382 (char *)text2, sizeof(text2));
383 for (i = 0; ; i++) {
384 if (text2[i] == 0) {
385 return len;
387 if (text2[i] != 0x0000FEFF && text2[i] != 0xFFFE0000) {
388 /* Not BOM */
389 len++;