1 /**********************************************************************
2 Freeciv - Copyright (C) 2003-2004 - The Freeciv Project
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2, or (at your option)
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12 ***********************************************************************/
15 #include <fc_config.h>
27 #ifdef HAVE_LANGINFO_CODESET
31 #ifdef HAVE_LIBCHARSET
32 #include <libcharset.h>
42 static bool is_init
= FALSE
;
43 static char convert_buffer
[4096];
46 static const char *local_encoding
, *data_encoding
, *internal_encoding
;
47 static const char *transliteration_string
;
48 #else /* HAVE_ICONV */
49 /* Hack to confuse the compiler into working. */
50 # define local_encoding get_local_encoding()
51 # define data_encoding get_local_encoding()
52 # define internal_encoding get_local_encoding()
53 #endif /* HAVE_ICONV */
55 /***************************************************************************
56 Must be called during the initialization phase of server and client to
57 initialize the character encodings to be used.
59 Pass an internal encoding of NULL to use the local encoding internally.
60 ***************************************************************************/
61 void init_character_encodings(const char *my_internal_encoding
,
62 bool my_use_transliteration
)
65 if (my_use_transliteration
) {
66 transliteration_string
= "//TRANSLIT";
68 transliteration_string
= "";
71 /* Set the data encoding - first check $FREECIV_DATA_ENCODING,
72 * then fall back to the default. */
73 data_encoding
= getenv("FREECIV_DATA_ENCODING");
75 data_encoding
= FC_DEFAULT_DATA_ENCODING
;
78 /* Set the local encoding - first check $FREECIV_LOCAL_ENCODING,
79 * then ask the system. */
80 local_encoding
= getenv("FREECIV_LOCAL_ENCODING");
81 if (!local_encoding
) {
82 #ifdef HAVE_LIBCHARSET
83 local_encoding
= locale_charset();
84 #else /* HAVE_LIBCHARSET */
85 #ifdef HAVE_LANGINFO_CODESET
86 local_encoding
= nl_langinfo(CODESET
);
87 #else /* HAVE_LANGINFO_CODESET */
89 #endif /* HAVE_LANGINFO_CODESET */
90 #endif /* HAVE_LIBCHARSET */
91 if (fc_strcasecmp(local_encoding
, "ANSI_X3.4-1968") == 0
92 || fc_strcasecmp(local_encoding
, "ASCII") == 0
93 || fc_strcasecmp(local_encoding
, "US-ASCII") == 0) {
94 /* HACK: use latin1 instead of ascii in typical cases when the
95 * encoding is unconfigured. */
96 local_encoding
= "ISO-8859-1";
99 if (fc_strcasecmp(local_encoding
, "646") == 0) {
100 /* HACK: On Solaris the encoding always comes up as "646" (ascii),
101 * which iconv doesn't understand. Work around it by using UTF-8
103 local_encoding
= "UTF-8";
107 /* Set the internal encoding - first check $FREECIV_INTERNAL_ENCODING,
108 * then check the passed-in default value, then fall back to the local
110 internal_encoding
= getenv("FREECIV_INTERNAL_ENCODING");
111 if (!internal_encoding
) {
112 internal_encoding
= my_internal_encoding
;
114 if (!internal_encoding
) {
115 internal_encoding
= local_encoding
;
120 bind_textdomain_codeset(PACKAGE
, internal_encoding
);
124 fprintf(stderr
, "Encodings: Data=%s, Local=%s, Internal=%s\n",
125 data_encoding
, local_encoding
, internal_encoding
);
128 #else /* HAVE_ICONV */
129 /* log_* may not work at this point. */
131 _("You are running Freeciv without using iconv. Unless\n"
132 "you are using the UTF-8 character set, some characters\n"
133 "may not be displayed properly. You can download iconv\n"
134 "at http://gnu.org/.\n"));
135 #endif /* HAVE_ICONV */
140 /***************************************************************************
141 Return the data encoding (usually UTF-8).
142 ***************************************************************************/
143 const char *get_data_encoding(void)
145 fc_assert_ret_val(is_init
, NULL
);
146 return data_encoding
;
149 /***************************************************************************
150 Return the local encoding (dependent on the system).
151 ***************************************************************************/
152 const char *get_local_encoding(void)
155 fc_assert_ret_val(is_init
, NULL
);
156 return local_encoding
;
157 #else /* HAVE_ICONV */
158 # ifdef HAVE_LIBCHARSET
159 return locale_charset();
160 # else /* HAVE_LIBCHARSET */
161 # ifdef HAVE_LANGINFO_CODESET
162 return nl_langinfo(CODESET
);
163 # else /* HAVE_LANGINFO_CODESET */
165 # endif /* HAVE_LANGINFO_CODESET */
166 # endif /* HAVE_LIBCHARSET */
167 #endif /* HAVE_ICONV */
170 /***************************************************************************
171 Return the internal encoding. This depends on the server or GUI being
173 ***************************************************************************/
174 const char *get_internal_encoding(void)
176 fc_assert_ret_val(is_init
, NULL
);
177 return internal_encoding
;
180 /***************************************************************************
181 Convert the text. Both 'from' and 'to' must be 8-bit charsets. The
182 result will be put into the buf buffer unless it is NULL, in which case it
183 will be allocated on demand.
185 Don't use this function if you can avoid it. Use one of the
186 xxx_to_yyy_string functions.
187 ***************************************************************************/
188 char *convert_string(const char *text
,
191 char *buf
, size_t bufsz
)
194 iconv_t cd
= iconv_open(to
, from
);
195 size_t from_len
= strlen(text
) + 1, to_len
;
196 bool alloc
= (buf
== NULL
);
198 fc_assert_ret_val(is_init
&& NULL
!= from
&& NULL
!= to
, NULL
);
199 fc_assert_ret_val(NULL
!= text
, NULL
);
201 if (cd
== (iconv_t
) (-1)) {
202 /* Do not do potentially recursive call to freeciv logging here,
203 * but use fprintf(stderr) */
204 /* Use the real OS-provided strerror and errno rather than Freeciv's
205 * abstraction, as that wouldn't do the correct thing with third-party
206 * iconv on Windows */
208 /* TRANS: "Could not convert text from <encoding a> to <encoding b>:"
209 * <externally translated error string>."*/
210 fprintf(stderr
, _("Could not convert text from %s to %s: %s.\n"),
211 from
, to
, strerror(errno
));
212 /* The best we can do? */
214 return fc_strdup(text
);
216 fc_snprintf(buf
, bufsz
, "%s", text
);
228 size_t flen
= from_len
, tlen
= to_len
, res
;
229 const char *mytext
= text
;
233 buf
= fc_malloc(to_len
);
238 /* Since we may do multiple translations, we may need to reset iconv
240 iconv(cd
, NULL
, NULL
, NULL
, NULL
);
242 res
= iconv(cd
, (ICONV_CONST
char **)&mytext
, &flen
, &myresult
, &tlen
);
243 if (res
== (size_t) (-1)) {
244 if (errno
!= E2BIG
) {
247 fprintf(stderr
, "Invalid string conversion from %s to %s: %s.\n",
248 from
, to
, strerror(errno
));
252 return fc_strdup(text
); /* The best we can do? */
254 fc_snprintf(buf
, bufsz
, "%s", text
);
262 /* There may be wasted space here, but there's nothing we can do
268 /* Not enough space; try again. */
277 #else /* HAVE_ICONV */
279 strncpy(buf
, text
, bufsz
);
280 buf
[bufsz
- 1] = '\0';
283 return fc_strdup(text
);
285 #endif /* HAVE_ICONV */
288 #define CONV_FUNC_MALLOC(src, dst) \
289 char *src ## _to_ ## dst ## _string_malloc(const char *text) \
291 const char *encoding1 = (dst ## _encoding); \
292 char encoding[strlen(encoding1) + strlen(transliteration_string) + 1]; \
294 fc_snprintf(encoding, sizeof(encoding), \
295 "%s%s", encoding1, transliteration_string); \
296 return convert_string(text, (src ## _encoding), \
297 (encoding), NULL, 0); \
300 #define CONV_FUNC_BUFFER(src, dst) \
301 char *src ## _to_ ## dst ## _string_buffer(const char *text, \
302 char *buf, size_t bufsz) \
304 const char *encoding1 = (dst ## _encoding); \
305 char encoding[strlen(encoding1) + strlen(transliteration_string) + 1]; \
307 fc_snprintf(encoding, sizeof(encoding), \
308 "%s%s", encoding1, transliteration_string); \
309 return convert_string(text, (src ## _encoding), \
310 encoding, buf, bufsz); \
313 #define CONV_FUNC_STATIC(src, dst) \
314 char *src ## _to_ ## dst ## _string_static(const char *text) \
316 (src ## _to_ ## dst ## _string_buffer)(text, \
318 sizeof(convert_buffer)); \
319 return convert_buffer; \
322 CONV_FUNC_MALLOC(data
, internal
)
323 CONV_FUNC_MALLOC(internal
, data
)
324 CONV_FUNC_MALLOC(internal
, local
)
325 CONV_FUNC_MALLOC(local
, internal
)
327 CONV_FUNC_BUFFER(local
, internal
)
328 CONV_FUNC_BUFFER(internal
, local
)
330 static CONV_FUNC_STATIC(internal
, local
)
332 /***************************************************************************
333 Do a fprintf from the internal charset into the local charset.
334 ***************************************************************************/
335 void fc_fprintf(FILE *stream
, const char *format
, ...)
340 static bool recursion
= FALSE
;
342 /* The recursion variable is used to prevent a recursive loop. If
343 * an iconv conversion fails, then log_* will be called and an
344 * fc_fprintf will be done. But below we do another iconv conversion
345 * on the error messages, which is of course likely to fail also. */
350 va_start(ap
, format
);
351 fc_vsnprintf(string
, sizeof(string
), format
, ap
);
356 output
= internal_to_local_string_static(string
);
362 fputs(output
, stream
);
366 /****************************************************************************
367 Return the length, in *characters*, of the string. This can be used in
368 place of strlen in some places because it returns the number of characters
369 not the number of bytes (with multi-byte characters in UTF-8, the two
370 may not be the same).
372 Use of this function outside of GUI layout code is probably a hack. For
373 instance the demographics code uses it, but this should instead pass the
374 data directly to the GUI library for formatting.
375 ****************************************************************************/
376 size_t get_internal_string_length(const char *text
)
378 int text2
[(strlen(text
) + 1)]; /* UCS-4 text */
382 convert_string(text
, internal_encoding
, "UCS-4",
383 (char *)text2
, sizeof(text2
));
388 if (text2
[i
] != 0x0000FEFF && text2
[i
] != 0xFFFE0000) {