Added macros for Layout and Misc sections of main config file.
[midnight-commander.git] / lib / charsets.c
blob2ad5bf7d4baa6b6ae25accc2a8feb9fca106aba5
1 /*
2 Text conversion from one charset to another.
4 Copyright (C) 2001, 2011
5 The Free Software Foundation, Inc.
7 Written by:
8 Walery Studennikov <despair@sama.ru>
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /** \file charsets.c
27 * \brief Source: Text conversion from one charset to another
30 #include <config.h>
32 #ifdef HAVE_CHARSET
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
38 #include "lib/global.h"
39 #include "lib/strutil.h" /* utf-8 functions */
40 #include "lib/fileloc.h"
41 #include "lib/charsets.h"
43 /*** global variables ****************************************************************************/
45 GPtrArray *codepages = NULL;
47 unsigned char conv_displ[256];
48 unsigned char conv_input[256];
50 const char *cp_display = NULL;
51 const char *cp_source = NULL;
53 /*** file scope macro definitions ****************************************************************/
55 #define OTHER_8BIT "Other_8_bit"
58 * FIXME: This assumes that ASCII is always the first encoding
59 * in mc.charsets
61 #define CP_ASCII 0
63 /*** file scope type declarations ****************************************************************/
65 /*** file scope variables ************************************************************************/
67 /*** file scope functions ************************************************************************/
68 /* --------------------------------------------------------------------------------------------- */
70 static codepage_desc *
71 new_codepage_desc (const char *id, const char *name)
73 codepage_desc *desc;
75 desc = g_new (codepage_desc, 1);
76 desc->id = g_strdup (id);
77 desc->name = g_strdup (name);
79 return desc;
82 /* --------------------------------------------------------------------------------------------- */
84 static void
85 free_codepage_desc (gpointer data, gpointer user_data)
87 codepage_desc *desc = (codepage_desc *) data;
88 (void) user_data;
90 g_free (desc->id);
91 g_free (desc->name);
92 g_free (desc);
95 /* --------------------------------------------------------------------------------------------- */
96 /* returns display codepage */
98 static void
99 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
101 FILE *f;
102 guint i;
103 char buf[BUF_MEDIUM];
104 char *default_codepage = NULL;
106 f = fopen (fname, "r");
107 if (f == NULL)
108 return;
110 for (i = 0; fgets (buf, sizeof buf, f) != NULL;)
112 /* split string into id and cpname */
113 char *p = buf;
114 size_t buflen = strlen (buf);
116 if (*p == '\n' || *p == '\0' || *p == '#')
117 continue;
119 if (buflen > 0 && buf[buflen - 1] == '\n')
120 buf[buflen - 1] = '\0';
121 while (*p != '\t' && *p != ' ' && *p != '\0')
122 ++p;
123 if (*p == '\0')
124 goto fail;
126 *p++ = '\0';
127 g_strstrip (p);
128 if (*p == '\0')
129 goto fail;
131 if (strcmp (buf, "default") == 0)
132 default_codepage = g_strdup (p);
133 else
135 const char *id = buf;
137 if (*list == NULL)
139 *list = g_ptr_array_sized_new (16);
140 g_ptr_array_add (*list, new_codepage_desc (id, p));
142 else
144 /* whether id is already present in list */
145 /* if yes, overwrite description */
146 for (i = 0; i < (*list)->len; i++)
148 codepage_desc *desc;
150 desc = (codepage_desc *) g_ptr_array_index (*list, i);
152 if (strcmp (id, desc->id) == 0)
154 /* found */
155 g_free (desc->name);
156 desc->name = g_strdup (p);
157 break;
161 /* not found */
162 if (i == (*list)->len)
163 g_ptr_array_add (*list, new_codepage_desc (id, p));
168 if (default_codepage != NULL)
170 mc_global.display_codepage = get_codepage_index (default_codepage);
171 g_free (default_codepage);
174 fail:
175 fclose (f);
178 /* --------------------------------------------------------------------------------------------- */
180 static char
181 translate_character (GIConv cd, char c)
183 gchar *tmp_buff = NULL;
184 gsize bytes_read, bytes_written = 0;
185 const char *ibuf = &c;
186 char ch = UNKNCHAR;
188 int ibuflen = 1;
190 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
191 if (tmp_buff)
192 ch = tmp_buff[0];
193 g_free (tmp_buff);
194 return ch;
197 /* --------------------------------------------------------------------------------------------- */
198 /*** public functions ****************************************************************************/
199 /* --------------------------------------------------------------------------------------------- */
201 void
202 load_codepages_list (void)
204 char *fname;
206 /* 1: try load /usr/share/mc/mc.charsets */
207 fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
208 load_codepages_list_from_file (&codepages, fname);
209 g_free (fname);
211 /* 2: try load /etc/mc/mc.charsets */
212 fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
213 load_codepages_list_from_file (&codepages, fname);
214 g_free (fname);
216 if (codepages == NULL)
218 /* files are not found, add defaullt codepage */
219 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
221 codepages = g_ptr_array_new ();
222 g_ptr_array_add (codepages, new_codepage_desc ("ASCII", _("7-bit ASCII")));
226 /* --------------------------------------------------------------------------------------------- */
228 void
229 free_codepages_list (void)
231 g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
232 g_ptr_array_free (codepages, TRUE);
235 /* --------------------------------------------------------------------------------------------- */
237 const char *
238 get_codepage_id (const int n)
240 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
243 /* --------------------------------------------------------------------------------------------- */
246 get_codepage_index (const char *id)
248 size_t i;
249 if (strcmp (id, OTHER_8BIT) == 0)
250 return -1;
251 if (codepages == NULL)
252 return -1;
253 for (i = 0; i < codepages->len; i++)
254 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
255 return i;
256 return -1;
259 /* --------------------------------------------------------------------------------------------- */
260 /** Check if specified encoding can be used in mc.
261 * @param encoding name of encoding
262 * @returns TRUE if encoding has supported by mc, FALSE otherwise
265 gboolean
266 is_supported_encoding (const char *encoding)
268 gboolean result = FALSE;
269 guint t;
271 for (t = 0; t < codepages->len; t++)
273 const char *id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
274 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
277 return result;
280 /* --------------------------------------------------------------------------------------------- */
282 char *
283 init_translation_table (int cpsource, int cpdisplay)
285 int i;
286 GIConv cd;
288 /* Fill inpit <-> display tables */
290 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
292 for (i = 0; i <= 255; ++i)
294 conv_displ[i] = i;
295 conv_input[i] = i;
296 cp_source = cp_display;
298 return NULL;
301 for (i = 0; i <= 127; ++i)
303 conv_displ[i] = i;
304 conv_input[i] = i;
306 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
307 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
309 /* display <- inpit table */
311 cd = g_iconv_open (cp_display, cp_source);
312 if (cd == INVALID_CONV)
313 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
315 for (i = 128; i <= 255; ++i)
316 conv_displ[i] = translate_character (cd, i);
318 g_iconv_close (cd);
320 /* inpit <- display table */
322 cd = g_iconv_open (cp_source, cp_display);
323 if (cd == INVALID_CONV)
324 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
326 for (i = 128; i <= 255; ++i)
328 unsigned char ch;
329 ch = translate_character (cd, i);
330 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
333 g_iconv_close (cd);
335 return NULL;
338 /* --------------------------------------------------------------------------------------------- */
340 void
341 convert_to_display (char *str)
343 if (!str)
344 return;
346 while (*str)
348 *str = conv_displ[(unsigned char) *str];
349 str++;
353 /* --------------------------------------------------------------------------------------------- */
355 GString *
356 str_convert_to_display (char *str)
358 return str_nconvert_to_display (str, -1);
362 /* --------------------------------------------------------------------------------------------- */
364 GString *
365 str_nconvert_to_display (char *str, int len)
367 GString *buff;
368 GIConv conv;
370 if (!str)
371 return g_string_new ("");
373 if (cp_display == cp_source)
374 return g_string_new (str);
376 conv = str_crt_conv_from (cp_source);
378 buff = g_string_new ("");
379 str_nconvert (conv, str, len, buff);
380 str_close_conv (conv);
381 return buff;
384 /* --------------------------------------------------------------------------------------------- */
386 void
387 convert_from_input (char *str)
389 if (!str)
390 return;
392 while (*str)
394 *str = conv_input[(unsigned char) *str];
395 str++;
399 /* --------------------------------------------------------------------------------------------- */
401 GString *
402 str_convert_to_input (char *str)
404 return str_nconvert_to_input (str, -1);
407 /* --------------------------------------------------------------------------------------------- */
409 GString *
410 str_nconvert_to_input (char *str, int len)
412 GString *buff;
413 GIConv conv;
415 if (!str)
416 return g_string_new ("");
418 if (cp_display == cp_source)
419 return g_string_new (str);
421 conv = str_crt_conv_to (cp_source);
423 buff = g_string_new ("");
424 str_nconvert (conv, str, len, buff);
425 str_close_conv (conv);
426 return buff;
429 /* --------------------------------------------------------------------------------------------- */
431 unsigned char
432 convert_from_utf_to_current (const char *str)
434 unsigned char buf_ch[6 + 1];
435 unsigned char ch = '.';
436 GIConv conv;
437 const char *cp_to;
439 if (!str)
440 return '.';
442 cp_to = get_codepage_id (mc_global.source_codepage);
443 conv = str_crt_conv_to (cp_to);
445 if (conv != INVALID_CONV)
447 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
449 case ESTR_SUCCESS:
450 ch = buf_ch[0];
451 break;
452 case ESTR_PROBLEM:
453 case ESTR_FAILURE:
454 ch = '.';
455 break;
457 str_close_conv (conv);
460 return ch;
464 /* --------------------------------------------------------------------------------------------- */
466 unsigned char
467 convert_from_utf_to_current_c (const int input_char, GIConv conv)
469 unsigned char str[6 + 1];
470 unsigned char buf_ch[6 + 1];
471 unsigned char ch = '.';
473 int res = 0;
475 res = g_unichar_to_utf8 (input_char, (char *) str);
476 if (res == 0)
478 return ch;
480 str[res] = '\0';
482 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
484 case ESTR_SUCCESS:
485 ch = buf_ch[0];
486 break;
487 case ESTR_PROBLEM:
488 case ESTR_FAILURE:
489 ch = '.';
490 break;
492 return ch;
495 /* --------------------------------------------------------------------------------------------- */
498 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
500 unsigned char str[2];
501 unsigned char buf_ch[6 + 1];
502 int ch = '.';
503 int res = 0;
505 str[0] = (unsigned char) input_char;
506 str[1] = '\0';
508 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
510 case ESTR_SUCCESS:
511 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
512 if (res < 0)
514 ch = buf_ch[0];
516 else
518 ch = res;
520 break;
521 case ESTR_PROBLEM:
522 case ESTR_FAILURE:
523 ch = '.';
524 break;
526 return ch;
529 /* --------------------------------------------------------------------------------------------- */
532 convert_from_8bit_to_utf_c2 (const char input_char)
534 unsigned char str[2];
535 unsigned char buf_ch[6 + 1];
536 int ch = '.';
537 int res = 0;
538 GIConv conv;
539 const char *cp_from;
541 str[0] = (unsigned char) input_char;
542 str[1] = '\0';
544 cp_from = get_codepage_id (mc_global.source_codepage);
545 conv = str_crt_conv_to (cp_from);
547 if (conv != INVALID_CONV)
549 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
551 case ESTR_SUCCESS:
552 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
553 if (res < 0)
555 ch = buf_ch[0];
557 else
559 ch = res;
561 break;
562 case ESTR_PROBLEM:
563 case ESTR_FAILURE:
564 ch = '.';
565 break;
567 str_close_conv (conv);
569 return ch;
573 /* --------------------------------------------------------------------------------------------- */
575 #endif /* HAVE_CHARSET */