Extended shortcuts like 'ctrl-x x' are unavailable in editor.
[midnight-commander.git] / lib / charsets.c
blob5cbe38fafcae4356af0ad6fa0271a7121507726d
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 /** \file charsets.c
21 * \brief Source: Text conversion from one charset to another
24 #include <config.h>
26 #ifdef HAVE_CHARSET
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
35 #include "lib/charsets.h"
37 /*** global variables ****************************************************************************/
39 GPtrArray *codepages = NULL;
41 unsigned char conv_displ[256];
42 unsigned char conv_input[256];
44 const char *cp_display = NULL;
45 const char *cp_source = NULL;
47 /*** file scope macro definitions ****************************************************************/
49 #define OTHER_8BIT "Other_8_bit"
52 * FIXME: This assumes that ASCII is always the first encoding
53 * in mc.charsets
55 #define CP_ASCII 0
57 /*** file scope type declarations ****************************************************************/
59 /*** file scope variables ************************************************************************/
61 /*** file scope functions ************************************************************************/
62 /* --------------------------------------------------------------------------------------------- */
64 static codepage_desc *
65 new_codepage_desc (const char *id, const char *name)
67 codepage_desc *desc;
69 desc = g_new (codepage_desc, 1);
70 desc->id = g_strdup (id);
71 desc->name = g_strdup (name);
73 return desc;
76 /* --------------------------------------------------------------------------------------------- */
78 static void
79 free_codepage_desc (gpointer data, gpointer user_data)
81 codepage_desc *desc = (codepage_desc *) data;
82 (void) user_data;
84 g_free (desc->id);
85 g_free (desc->name);
86 g_free (desc);
89 /* --------------------------------------------------------------------------------------------- */
90 /* returns display codepage */
92 static void
93 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
95 FILE *f;
96 guint i;
97 char buf[BUF_MEDIUM];
98 char *default_codepage = NULL;
100 f = fopen (fname, "r");
101 if (f == NULL)
102 return;
104 for (i = 0; fgets (buf, sizeof buf, f) != NULL;)
106 /* split string into id and cpname */
107 char *p = buf;
108 size_t buflen = strlen (buf);
110 if (*p == '\n' || *p == '\0' || *p == '#')
111 continue;
113 if (buflen > 0 && buf[buflen - 1] == '\n')
114 buf[buflen - 1] = '\0';
115 while (*p != '\t' && *p != ' ' && *p != '\0')
116 ++p;
117 if (*p == '\0')
118 goto fail;
120 *p++ = '\0';
121 g_strstrip (p);
122 if (*p == '\0')
123 goto fail;
125 if (strcmp (buf, "default") == 0)
126 default_codepage = g_strdup (p);
127 else
129 const char *id = buf;
131 if (*list == NULL)
133 *list = g_ptr_array_sized_new (16);
134 g_ptr_array_add (*list, new_codepage_desc (id, p));
136 else
138 /* whether id is already present in list */
139 /* if yes, overwrite description */
140 for (i = 0; i < (*list)->len; i++)
142 codepage_desc *desc;
144 desc = (codepage_desc *) g_ptr_array_index (*list, i);
146 if (strcmp (id, desc->id) == 0)
148 /* found */
149 g_free (desc->name);
150 desc->name = g_strdup (p);
151 break;
155 /* not found */
156 if (i == (*list)->len)
157 g_ptr_array_add (*list, new_codepage_desc (id, p));
162 if (default_codepage != NULL)
164 mc_global.display_codepage = get_codepage_index (default_codepage);
165 g_free (default_codepage);
168 fail:
169 fclose (f);
172 /* --------------------------------------------------------------------------------------------- */
174 static char
175 translate_character (GIConv cd, char c)
177 gchar *tmp_buff = NULL;
178 gsize bytes_read, bytes_written = 0;
179 const char *ibuf = &c;
180 char ch = UNKNCHAR;
182 int ibuflen = 1;
184 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
185 if (tmp_buff)
186 ch = tmp_buff[0];
187 g_free (tmp_buff);
188 return ch;
191 /* --------------------------------------------------------------------------------------------- */
192 /*** public functions ****************************************************************************/
193 /* --------------------------------------------------------------------------------------------- */
195 void
196 load_codepages_list (void)
198 char *fname;
200 /* 1: try load /usr/share/mc/mc.charsets */
201 fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
202 load_codepages_list_from_file (&codepages, fname);
203 g_free (fname);
205 /* 2: try load /etc/mc/mc.charsets */
206 fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
207 load_codepages_list_from_file (&codepages, fname);
208 g_free (fname);
210 if (codepages == NULL)
212 /* files are not found, add defaullt codepage */
213 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
215 codepages = g_ptr_array_new ();
216 g_ptr_array_add (codepages, new_codepage_desc ("ASCII", _("7-bit ASCII")));
220 /* --------------------------------------------------------------------------------------------- */
222 void
223 free_codepages_list (void)
225 g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
226 g_ptr_array_free (codepages, TRUE);
229 /* --------------------------------------------------------------------------------------------- */
231 const char *
232 get_codepage_id (const int n)
234 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
237 /* --------------------------------------------------------------------------------------------- */
240 get_codepage_index (const char *id)
242 size_t i;
243 if (strcmp (id, OTHER_8BIT) == 0)
244 return -1;
245 if (codepages == NULL)
246 return -1;
247 for (i = 0; i < codepages->len; i++)
248 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
249 return i;
250 return -1;
253 /* --------------------------------------------------------------------------------------------- */
254 /** Check if specified encoding can be used in mc.
255 * @param encoding name of encoding
256 * @returns TRUE if encoding has supported by mc, FALSE otherwise
259 gboolean
260 is_supported_encoding (const char *encoding)
262 gboolean result = FALSE;
263 guint t;
265 for (t = 0; t < codepages->len; t++)
267 const char *id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
268 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
271 return result;
274 /* --------------------------------------------------------------------------------------------- */
276 char *
277 init_translation_table (int cpsource, int cpdisplay)
279 int i;
280 GIConv cd;
282 /* Fill inpit <-> display tables */
284 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
286 for (i = 0; i <= 255; ++i)
288 conv_displ[i] = i;
289 conv_input[i] = i;
290 cp_source = cp_display;
292 return NULL;
295 for (i = 0; i <= 127; ++i)
297 conv_displ[i] = i;
298 conv_input[i] = i;
300 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
301 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
303 /* display <- inpit table */
305 cd = g_iconv_open (cp_display, cp_source);
306 if (cd == INVALID_CONV)
307 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
309 for (i = 128; i <= 255; ++i)
310 conv_displ[i] = translate_character (cd, i);
312 g_iconv_close (cd);
314 /* inpit <- display table */
316 cd = g_iconv_open (cp_source, cp_display);
317 if (cd == INVALID_CONV)
318 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
320 for (i = 128; i <= 255; ++i)
322 unsigned char ch;
323 ch = translate_character (cd, i);
324 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
327 g_iconv_close (cd);
329 return NULL;
332 /* --------------------------------------------------------------------------------------------- */
334 void
335 convert_to_display (char *str)
337 if (!str)
338 return;
340 while (*str)
342 *str = conv_displ[(unsigned char) *str];
343 str++;
347 /* --------------------------------------------------------------------------------------------- */
349 GString *
350 str_convert_to_display (char *str)
352 return str_nconvert_to_display (str, -1);
356 /* --------------------------------------------------------------------------------------------- */
358 GString *
359 str_nconvert_to_display (char *str, int len)
361 GString *buff;
362 GIConv conv;
364 if (!str)
365 return g_string_new ("");
367 if (cp_display == cp_source)
368 return g_string_new (str);
370 conv = str_crt_conv_from (cp_source);
372 buff = g_string_new ("");
373 str_nconvert (conv, str, len, buff);
374 str_close_conv (conv);
375 return buff;
378 /* --------------------------------------------------------------------------------------------- */
380 void
381 convert_from_input (char *str)
383 if (!str)
384 return;
386 while (*str)
388 *str = conv_input[(unsigned char) *str];
389 str++;
393 /* --------------------------------------------------------------------------------------------- */
395 GString *
396 str_convert_to_input (char *str)
398 return str_nconvert_to_input (str, -1);
401 /* --------------------------------------------------------------------------------------------- */
403 GString *
404 str_nconvert_to_input (char *str, int len)
406 GString *buff;
407 GIConv conv;
409 if (!str)
410 return g_string_new ("");
412 if (cp_display == cp_source)
413 return g_string_new (str);
415 conv = str_crt_conv_to (cp_source);
417 buff = g_string_new ("");
418 str_nconvert (conv, str, len, buff);
419 str_close_conv (conv);
420 return buff;
423 /* --------------------------------------------------------------------------------------------- */
425 unsigned char
426 convert_from_utf_to_current (const char *str)
428 unsigned char buf_ch[6 + 1];
429 unsigned char ch = '.';
430 GIConv conv;
431 const char *cp_to;
433 if (!str)
434 return '.';
436 cp_to = get_codepage_id (mc_global.source_codepage);
437 conv = str_crt_conv_to (cp_to);
439 if (conv != INVALID_CONV)
441 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
443 case ESTR_SUCCESS:
444 ch = buf_ch[0];
445 break;
446 case ESTR_PROBLEM:
447 case ESTR_FAILURE:
448 ch = '.';
449 break;
451 str_close_conv (conv);
454 return ch;
458 /* --------------------------------------------------------------------------------------------- */
460 unsigned char
461 convert_from_utf_to_current_c (const int input_char, GIConv conv)
463 unsigned char str[6 + 1];
464 unsigned char buf_ch[6 + 1];
465 unsigned char ch = '.';
467 int res = 0;
469 res = g_unichar_to_utf8 (input_char, (char *) str);
470 if (res == 0)
472 return ch;
474 str[res] = '\0';
476 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
478 case ESTR_SUCCESS:
479 ch = buf_ch[0];
480 break;
481 case ESTR_PROBLEM:
482 case ESTR_FAILURE:
483 ch = '.';
484 break;
486 return ch;
489 /* --------------------------------------------------------------------------------------------- */
492 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
494 unsigned char str[2];
495 unsigned char buf_ch[6 + 1];
496 int ch = '.';
497 int res = 0;
499 str[0] = (unsigned char) input_char;
500 str[1] = '\0';
502 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
504 case ESTR_SUCCESS:
505 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
506 if (res < 0)
508 ch = buf_ch[0];
510 else
512 ch = res;
514 break;
515 case ESTR_PROBLEM:
516 case ESTR_FAILURE:
517 ch = '.';
518 break;
520 return ch;
523 /* --------------------------------------------------------------------------------------------- */
526 convert_from_8bit_to_utf_c2 (const char input_char)
528 unsigned char str[2];
529 unsigned char buf_ch[6 + 1];
530 int ch = '.';
531 int res = 0;
532 GIConv conv;
533 const char *cp_from;
535 str[0] = (unsigned char) input_char;
536 str[1] = '\0';
538 cp_from = get_codepage_id (mc_global.source_codepage);
539 conv = str_crt_conv_to (cp_from);
541 if (conv != INVALID_CONV)
543 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
545 case ESTR_SUCCESS:
546 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
547 if (res < 0)
549 ch = buf_ch[0];
551 else
553 ch = res;
555 break;
556 case ESTR_PROBLEM:
557 case ESTR_FAILURE:
558 ch = '.';
559 break;
561 str_close_conv (conv);
563 return ch;
567 /* --------------------------------------------------------------------------------------------- */
569 #endif /* HAVE_CHARSET */