Updated doc/NEWS file
[midnight-commander.git] / lib / charsets.c
blob63583a4dc1e8527a75834c751fde3384394adcc4
1 /*
2 Text conversion from one charset to another.
4 Copyright (C) 2001, 2011
5 The Free Software Foundation, Inc.
7 Written by:
8 Walery Studennikov <despair@sama.ru>
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /** \file charsets.c
27 * \brief Source: Text conversion from one charset to another
30 #include <config.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
36 #include "lib/global.h"
37 #include "lib/strutil.h" /* utf-8 functions */
38 #include "lib/fileloc.h"
39 #include "lib/charsets.h"
41 /*** global variables ****************************************************************************/
43 GPtrArray *codepages = NULL;
45 unsigned char conv_displ[256];
46 unsigned char conv_input[256];
48 const char *cp_display = NULL;
49 const char *cp_source = NULL;
51 /*** file scope macro definitions ****************************************************************/
53 #define UNKNCHAR '\001'
55 #define OTHER_8BIT "Other_8_bit"
58 * FIXME: This assumes that ASCII is always the first encoding
59 * in mc.charsets
61 #define CP_ASCII 0
63 /*** file scope type declarations ****************************************************************/
65 /*** file scope variables ************************************************************************/
67 /*** file scope functions ************************************************************************/
68 /* --------------------------------------------------------------------------------------------- */
70 static codepage_desc *
71 new_codepage_desc (const char *id, const char *name)
73 codepage_desc *desc;
75 desc = g_new (codepage_desc, 1);
76 desc->id = g_strdup (id);
77 desc->name = g_strdup (name);
79 return desc;
82 /* --------------------------------------------------------------------------------------------- */
84 static void
85 free_codepage_desc (gpointer data, gpointer user_data)
87 codepage_desc *desc = (codepage_desc *) data;
88 (void) user_data;
90 g_free (desc->id);
91 g_free (desc->name);
92 g_free (desc);
95 /* --------------------------------------------------------------------------------------------- */
96 /* returns display codepage */
98 static void
99 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
101 FILE *f;
102 char buf[BUF_MEDIUM];
103 char *default_codepage = NULL;
105 f = fopen (fname, "r");
106 if (f == NULL)
107 return;
109 while (fgets (buf, sizeof buf, f) != NULL)
111 /* split string into id and cpname */
112 char *p = buf;
113 size_t buflen = strlen (buf);
115 if (*p == '\n' || *p == '\0' || *p == '#')
116 continue;
118 if (buflen > 0 && buf[buflen - 1] == '\n')
119 buf[buflen - 1] = '\0';
120 while (*p != '\t' && *p != ' ' && *p != '\0')
121 ++p;
122 if (*p == '\0')
123 goto fail;
125 *p++ = '\0';
126 g_strstrip (p);
127 if (*p == '\0')
128 goto fail;
130 if (strcmp (buf, "default") == 0)
131 default_codepage = g_strdup (p);
132 else
134 const char *id = buf;
136 if (*list == NULL)
138 *list = g_ptr_array_sized_new (16);
139 g_ptr_array_add (*list, new_codepage_desc (id, p));
141 else
143 unsigned int i;
145 /* whether id is already present in list */
146 /* if yes, overwrite description */
147 for (i = 0; i < (*list)->len; i++)
149 codepage_desc *desc;
151 desc = (codepage_desc *) g_ptr_array_index (*list, i);
153 if (strcmp (id, desc->id) == 0)
155 /* found */
156 g_free (desc->name);
157 desc->name = g_strdup (p);
158 break;
162 /* not found */
163 if (i == (*list)->len)
164 g_ptr_array_add (*list, new_codepage_desc (id, p));
169 if (default_codepage != NULL)
171 mc_global.display_codepage = get_codepage_index (default_codepage);
172 g_free (default_codepage);
175 fail:
176 fclose (f);
179 /* --------------------------------------------------------------------------------------------- */
181 static char
182 translate_character (GIConv cd, char c)
184 gchar *tmp_buff = NULL;
185 gsize bytes_read, bytes_written = 0;
186 const char *ibuf = &c;
187 char ch = UNKNCHAR;
189 int ibuflen = 1;
191 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
192 if (tmp_buff)
193 ch = tmp_buff[0];
194 g_free (tmp_buff);
195 return ch;
198 /* --------------------------------------------------------------------------------------------- */
199 /*** public functions ****************************************************************************/
200 /* --------------------------------------------------------------------------------------------- */
202 void
203 load_codepages_list (void)
205 char *fname;
207 /* 1: try load /usr/share/mc/mc.charsets */
208 fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
209 load_codepages_list_from_file (&codepages, fname);
210 g_free (fname);
212 /* 2: try load /etc/mc/mc.charsets */
213 fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
214 load_codepages_list_from_file (&codepages, fname);
215 g_free (fname);
217 if (codepages == NULL)
219 /* files are not found, add defaullt codepage */
220 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
222 codepages = g_ptr_array_new ();
223 g_ptr_array_add (codepages, new_codepage_desc ("ASCII", _("7-bit ASCII")));
227 /* --------------------------------------------------------------------------------------------- */
229 void
230 free_codepages_list (void)
232 g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
233 g_ptr_array_free (codepages, TRUE);
236 /* --------------------------------------------------------------------------------------------- */
238 const char *
239 get_codepage_id (const int n)
241 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
244 /* --------------------------------------------------------------------------------------------- */
247 get_codepage_index (const char *id)
249 size_t i;
250 if (strcmp (id, OTHER_8BIT) == 0)
251 return -1;
252 if (codepages == NULL)
253 return -1;
254 for (i = 0; i < codepages->len; i++)
255 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
256 return i;
257 return -1;
260 /* --------------------------------------------------------------------------------------------- */
261 /** Check if specified encoding can be used in mc.
262 * @param encoding name of encoding
263 * @return TRUE if encoding is supported by mc, FALSE otherwise
266 gboolean
267 is_supported_encoding (const char *encoding)
269 gboolean result = FALSE;
270 guint t;
272 for (t = 0; t < codepages->len; t++)
274 const char *id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
275 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
278 return result;
281 /* --------------------------------------------------------------------------------------------- */
283 char *
284 init_translation_table (int cpsource, int cpdisplay)
286 int i;
287 GIConv cd;
289 /* Fill inpit <-> display tables */
291 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
293 for (i = 0; i <= 255; ++i)
295 conv_displ[i] = i;
296 conv_input[i] = i;
297 cp_source = cp_display;
299 return NULL;
302 for (i = 0; i <= 127; ++i)
304 conv_displ[i] = i;
305 conv_input[i] = i;
307 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
308 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
310 /* display <- inpit table */
312 cd = g_iconv_open (cp_display, cp_source);
313 if (cd == INVALID_CONV)
314 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
316 for (i = 128; i <= 255; ++i)
317 conv_displ[i] = translate_character (cd, i);
319 g_iconv_close (cd);
321 /* inpit <- display table */
323 cd = g_iconv_open (cp_source, cp_display);
324 if (cd == INVALID_CONV)
325 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
327 for (i = 128; i <= 255; ++i)
329 unsigned char ch;
330 ch = translate_character (cd, i);
331 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
334 g_iconv_close (cd);
336 return NULL;
339 /* --------------------------------------------------------------------------------------------- */
341 void
342 convert_to_display (char *str)
344 if (!str)
345 return;
347 while (*str)
349 *str = conv_displ[(unsigned char) *str];
350 str++;
354 /* --------------------------------------------------------------------------------------------- */
356 GString *
357 str_convert_to_display (char *str)
359 return str_nconvert_to_display (str, -1);
363 /* --------------------------------------------------------------------------------------------- */
365 GString *
366 str_nconvert_to_display (char *str, int len)
368 GString *buff;
369 GIConv conv;
371 if (!str)
372 return g_string_new ("");
374 if (cp_display == cp_source)
375 return g_string_new (str);
377 conv = str_crt_conv_from (cp_source);
379 buff = g_string_new ("");
380 str_nconvert (conv, str, len, buff);
381 str_close_conv (conv);
382 return buff;
385 /* --------------------------------------------------------------------------------------------- */
387 void
388 convert_from_input (char *str)
390 if (!str)
391 return;
393 while (*str)
395 *str = conv_input[(unsigned char) *str];
396 str++;
400 /* --------------------------------------------------------------------------------------------- */
402 GString *
403 str_convert_to_input (char *str)
405 return str_nconvert_to_input (str, -1);
408 /* --------------------------------------------------------------------------------------------- */
410 GString *
411 str_nconvert_to_input (char *str, int len)
413 GString *buff;
414 GIConv conv;
416 if (!str)
417 return g_string_new ("");
419 if (cp_display == cp_source)
420 return g_string_new (str);
422 conv = str_crt_conv_to (cp_source);
424 buff = g_string_new ("");
425 str_nconvert (conv, str, len, buff);
426 str_close_conv (conv);
427 return buff;
430 /* --------------------------------------------------------------------------------------------- */
432 unsigned char
433 convert_from_utf_to_current (const char *str)
435 unsigned char buf_ch[6 + 1];
436 unsigned char ch = '.';
437 GIConv conv;
438 const char *cp_to;
440 if (!str)
441 return '.';
443 cp_to = get_codepage_id (mc_global.source_codepage);
444 conv = str_crt_conv_to (cp_to);
446 if (conv != INVALID_CONV)
448 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
450 case ESTR_SUCCESS:
451 ch = buf_ch[0];
452 break;
453 case ESTR_PROBLEM:
454 case ESTR_FAILURE:
455 ch = '.';
456 break;
458 str_close_conv (conv);
461 return ch;
465 /* --------------------------------------------------------------------------------------------- */
467 unsigned char
468 convert_from_utf_to_current_c (const int input_char, GIConv conv)
470 unsigned char str[6 + 1];
471 unsigned char buf_ch[6 + 1];
472 unsigned char ch = '.';
474 int res = 0;
476 res = g_unichar_to_utf8 (input_char, (char *) str);
477 if (res == 0)
479 return ch;
481 str[res] = '\0';
483 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
485 case ESTR_SUCCESS:
486 ch = buf_ch[0];
487 break;
488 case ESTR_PROBLEM:
489 case ESTR_FAILURE:
490 ch = '.';
491 break;
493 return ch;
496 /* --------------------------------------------------------------------------------------------- */
499 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
501 unsigned char str[2];
502 unsigned char buf_ch[6 + 1];
503 int ch = '.';
504 int res = 0;
506 str[0] = (unsigned char) input_char;
507 str[1] = '\0';
509 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
511 case ESTR_SUCCESS:
512 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
513 if (res < 0)
515 ch = buf_ch[0];
517 else
519 ch = res;
521 break;
522 case ESTR_PROBLEM:
523 case ESTR_FAILURE:
524 ch = '.';
525 break;
527 return ch;
530 /* --------------------------------------------------------------------------------------------- */
533 convert_from_8bit_to_utf_c2 (const char input_char)
535 unsigned char str[2];
536 unsigned char buf_ch[6 + 1];
537 int ch = '.';
538 int res = 0;
539 GIConv conv;
540 const char *cp_from;
542 str[0] = (unsigned char) input_char;
543 str[1] = '\0';
545 cp_from = get_codepage_id (mc_global.source_codepage);
546 conv = str_crt_conv_to (cp_from);
548 if (conv != INVALID_CONV)
550 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
552 case ESTR_SUCCESS:
553 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
554 if (res < 0)
556 ch = buf_ch[0];
558 else
560 ch = res;
562 break;
563 case ESTR_PROBLEM:
564 case ESTR_FAILURE:
565 ch = '.';
566 break;
568 str_close_conv (conv);
570 return ch;
574 /* --------------------------------------------------------------------------------------------- */