extfs: uarc: add tests.
[midnight-commander.git] / lib / charsets.c
blobab6627f3401648cc5168eecf024e081772024bf2
1 /*
2 Text conversion from one charset to another.
4 Copyright (C) 2001-2016
5 Free Software Foundation, Inc.
7 Written by:
8 Walery Studennikov <despair@sama.ru>
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /** \file charsets.c
27 * \brief Source: Text conversion from one charset to another
30 #include <config.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
36 #include "lib/global.h"
37 #include "lib/strutil.h" /* utf-8 functions */
38 #include "lib/fileloc.h"
39 #include "lib/charsets.h"
41 /*** global variables ****************************************************************************/
43 GPtrArray *codepages = NULL;
45 unsigned char conv_displ[256];
46 unsigned char conv_input[256];
48 const char *cp_display = NULL;
49 const char *cp_source = NULL;
51 /*** file scope macro definitions ****************************************************************/
53 #define UNKNCHAR '\001'
55 #define OTHER_8BIT "Other_8_bit"
57 /*** file scope type declarations ****************************************************************/
59 /*** file scope variables ************************************************************************/
61 /*** file scope functions ************************************************************************/
62 /* --------------------------------------------------------------------------------------------- */
64 static codepage_desc *
65 new_codepage_desc (const char *id, const char *name)
67 codepage_desc *desc;
69 desc = g_new (codepage_desc, 1);
70 desc->id = g_strdup (id);
71 desc->name = g_strdup (name);
73 return desc;
76 /* --------------------------------------------------------------------------------------------- */
78 static void
79 free_codepage_desc (gpointer data, gpointer user_data)
81 codepage_desc *desc = (codepage_desc *) data;
82 (void) user_data;
84 g_free (desc->id);
85 g_free (desc->name);
86 g_free (desc);
89 /* --------------------------------------------------------------------------------------------- */
90 /* returns display codepage */
92 static void
93 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
95 FILE *f;
96 char buf[BUF_MEDIUM];
97 char *default_codepage = NULL;
99 f = fopen (fname, "r");
100 if (f == NULL)
101 return;
103 while (fgets (buf, sizeof buf, f) != NULL)
105 /* split string into id and cpname */
106 char *p = buf;
107 size_t buflen = strlen (buf);
109 if (*p == '\n' || *p == '\0' || *p == '#')
110 continue;
112 if (buflen > 0 && buf[buflen - 1] == '\n')
113 buf[buflen - 1] = '\0';
114 while (*p != '\t' && *p != ' ' && *p != '\0')
115 ++p;
116 if (*p == '\0')
117 goto fail;
119 *p++ = '\0';
120 g_strstrip (p);
121 if (*p == '\0')
122 goto fail;
124 if (strcmp (buf, "default") == 0)
125 default_codepage = g_strdup (p);
126 else
128 const char *id = buf;
130 if (*list == NULL)
132 *list = g_ptr_array_sized_new (16);
133 g_ptr_array_add (*list, new_codepage_desc (id, p));
135 else
137 unsigned int i;
139 /* whether id is already present in list */
140 /* if yes, overwrite description */
141 for (i = 0; i < (*list)->len; i++)
143 codepage_desc *desc;
145 desc = (codepage_desc *) g_ptr_array_index (*list, i);
147 if (strcmp (id, desc->id) == 0)
149 /* found */
150 g_free (desc->name);
151 desc->name = g_strdup (p);
152 break;
156 /* not found */
157 if (i == (*list)->len)
158 g_ptr_array_add (*list, new_codepage_desc (id, p));
163 if (default_codepage != NULL)
165 mc_global.display_codepage = get_codepage_index (default_codepage);
166 g_free (default_codepage);
169 fail:
170 fclose (f);
173 /* --------------------------------------------------------------------------------------------- */
175 static char
176 translate_character (GIConv cd, char c)
178 gchar *tmp_buff = NULL;
179 gsize bytes_read, bytes_written = 0;
180 const char *ibuf = &c;
181 char ch = UNKNCHAR;
183 int ibuflen = 1;
185 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
186 if (tmp_buff)
187 ch = tmp_buff[0];
188 g_free (tmp_buff);
189 return ch;
192 /* --------------------------------------------------------------------------------------------- */
193 /*** public functions ****************************************************************************/
194 /* --------------------------------------------------------------------------------------------- */
196 void
197 load_codepages_list (void)
199 char *fname;
201 /* 1: try load /usr/share/mc/mc.charsets */
202 fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
203 load_codepages_list_from_file (&codepages, fname);
204 g_free (fname);
206 /* 2: try load /etc/mc/mc.charsets */
207 fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
208 load_codepages_list_from_file (&codepages, fname);
209 g_free (fname);
211 if (codepages == NULL)
213 /* files are not found, add defaullt codepage */
214 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
216 codepages = g_ptr_array_new ();
217 g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _("7-bit ASCII")));
221 /* --------------------------------------------------------------------------------------------- */
223 void
224 free_codepages_list (void)
226 g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
227 g_ptr_array_free (codepages, TRUE);
228 /* NULL-ize pointer to make unit tests happy */
229 codepages = NULL;
232 /* --------------------------------------------------------------------------------------------- */
234 const char *
235 get_codepage_id (const int n)
237 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
240 /* --------------------------------------------------------------------------------------------- */
243 get_codepage_index (const char *id)
245 size_t i;
246 if (strcmp (id, OTHER_8BIT) == 0)
247 return -1;
248 if (codepages == NULL)
249 return -1;
250 for (i = 0; i < codepages->len; i++)
251 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
252 return i;
253 return -1;
256 /* --------------------------------------------------------------------------------------------- */
257 /** Check if specified encoding can be used in mc.
258 * @param encoding name of encoding
259 * @return TRUE if encoding is supported by mc, FALSE otherwise
262 gboolean
263 is_supported_encoding (const char *encoding)
265 gboolean result = FALSE;
266 guint t;
268 for (t = 0; t < codepages->len; t++)
270 const char *id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
271 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
274 return result;
277 /* --------------------------------------------------------------------------------------------- */
279 char *
280 init_translation_table (int cpsource, int cpdisplay)
282 int i;
283 GIConv cd;
285 /* Fill inpit <-> display tables */
287 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
289 for (i = 0; i <= 255; ++i)
291 conv_displ[i] = i;
292 conv_input[i] = i;
293 cp_source = cp_display;
295 return NULL;
298 for (i = 0; i <= 127; ++i)
300 conv_displ[i] = i;
301 conv_input[i] = i;
303 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
304 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
306 /* display <- inpit table */
308 cd = g_iconv_open (cp_display, cp_source);
309 if (cd == INVALID_CONV)
310 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
312 for (i = 128; i <= 255; ++i)
313 conv_displ[i] = translate_character (cd, i);
315 g_iconv_close (cd);
317 /* inpit <- display table */
319 cd = g_iconv_open (cp_source, cp_display);
320 if (cd == INVALID_CONV)
321 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
323 for (i = 128; i <= 255; ++i)
325 unsigned char ch;
326 ch = translate_character (cd, i);
327 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
330 g_iconv_close (cd);
332 return NULL;
335 /* --------------------------------------------------------------------------------------------- */
337 void
338 convert_to_display (char *str)
340 if (!str)
341 return;
343 while (*str)
345 *str = conv_displ[(unsigned char) *str];
346 str++;
350 /* --------------------------------------------------------------------------------------------- */
352 GString *
353 str_convert_to_display (const char *str)
355 return str_nconvert_to_display (str, -1);
359 /* --------------------------------------------------------------------------------------------- */
361 GString *
362 str_nconvert_to_display (const char *str, int len)
364 GString *buff;
365 GIConv conv;
367 if (!str)
368 return g_string_new ("");
370 if (cp_display == cp_source)
371 return g_string_new (str);
373 conv = str_crt_conv_from (cp_source);
375 buff = g_string_new ("");
376 str_nconvert (conv, str, len, buff);
377 str_close_conv (conv);
378 return buff;
381 /* --------------------------------------------------------------------------------------------- */
383 void
384 convert_from_input (char *str)
386 if (!str)
387 return;
389 while (*str)
391 *str = conv_input[(unsigned char) *str];
392 str++;
396 /* --------------------------------------------------------------------------------------------- */
398 GString *
399 str_convert_to_input (const char *str)
401 return str_nconvert_to_input (str, -1);
404 /* --------------------------------------------------------------------------------------------- */
406 GString *
407 str_nconvert_to_input (const char *str, int len)
409 GString *buff;
410 GIConv conv;
412 if (!str)
413 return g_string_new ("");
415 if (cp_display == cp_source)
416 return g_string_new (str);
418 conv = str_crt_conv_to (cp_source);
420 buff = g_string_new ("");
421 str_nconvert (conv, str, len, buff);
422 str_close_conv (conv);
423 return buff;
426 /* --------------------------------------------------------------------------------------------- */
428 unsigned char
429 convert_from_utf_to_current (const char *str)
431 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
432 unsigned char ch = '.';
433 GIConv conv;
434 const char *cp_to;
436 if (str == NULL)
437 return '.';
439 cp_to = get_codepage_id (mc_global.source_codepage);
440 conv = str_crt_conv_to (cp_to);
442 if (conv != INVALID_CONV)
444 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
446 case ESTR_SUCCESS:
447 ch = buf_ch[0];
448 break;
449 case ESTR_PROBLEM:
450 case ESTR_FAILURE:
451 ch = '.';
452 break;
453 default:
454 break;
456 str_close_conv (conv);
459 return ch;
462 /* --------------------------------------------------------------------------------------------- */
464 unsigned char
465 convert_from_utf_to_current_c (int input_char, GIConv conv)
467 unsigned char str[UTF8_CHAR_LEN + 1];
468 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
469 unsigned char ch = '.';
470 int res;
472 res = g_unichar_to_utf8 (input_char, (char *) str);
473 if (res == 0)
474 return ch;
476 str[res] = '\0';
478 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
480 case ESTR_SUCCESS:
481 ch = buf_ch[0];
482 break;
483 case ESTR_PROBLEM:
484 case ESTR_FAILURE:
485 ch = '.';
486 break;
487 default:
488 break;
491 return ch;
494 /* --------------------------------------------------------------------------------------------- */
497 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
499 unsigned char str[2];
500 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
501 int ch;
503 str[0] = (unsigned char) input_char;
504 str[1] = '\0';
506 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
508 case ESTR_SUCCESS:
510 int res;
512 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
513 ch = res >= 0 ? res : buf_ch[0];
514 break;
516 case ESTR_PROBLEM:
517 case ESTR_FAILURE:
518 default:
519 ch = '.';
520 break;
523 return ch;
526 /* --------------------------------------------------------------------------------------------- */
529 convert_from_8bit_to_utf_c2 (char input_char)
531 unsigned char str[2];
532 int ch = '.';
533 GIConv conv;
534 const char *cp_from;
536 str[0] = (unsigned char) input_char;
537 str[1] = '\0';
539 cp_from = get_codepage_id (mc_global.source_codepage);
540 conv = str_crt_conv_to (cp_from);
542 if (conv != INVALID_CONV)
544 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
546 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
548 case ESTR_SUCCESS:
550 int res;
552 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
553 ch = res >= 0 ? res : buf_ch[0];
554 break;
556 case ESTR_PROBLEM:
557 case ESTR_FAILURE:
558 default:
559 ch = '.';
560 break;
562 str_close_conv (conv);
565 return ch;
568 /* --------------------------------------------------------------------------------------------- */