mceditor: refactoring.
[midnight-commander.git] / lib / charsets.c
blobc5ccaea57ca8bbe6952f4059d46cd08dc3f1c3bd
1 /*
2 Text conversion from one charset to another.
4 Copyright (C) 2001-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Walery Studennikov <despair@sama.ru>
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /** \file charsets.c
27 * \brief Source: Text conversion from one charset to another
30 #include <config.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
36 #include "lib/global.h"
37 #include "lib/strutil.h" /* utf-8 functions */
38 #include "lib/fileloc.h"
39 #include "lib/util.h" /* whitespace() */
41 #include "lib/charsets.h"
43 /*** global variables ****************************************************************************/
45 GPtrArray *codepages = NULL;
47 unsigned char conv_displ[256];
48 unsigned char conv_input[256];
50 const char *cp_display = NULL;
51 const char *cp_source = NULL;
53 /*** file scope macro definitions ****************************************************************/
55 #define UNKNCHAR '\001'
57 #define OTHER_8BIT "Other_8_bit"
59 /*** file scope type declarations ****************************************************************/
61 /*** forward declarations (file scope functions) *************************************************/
63 /*** file scope variables ************************************************************************/
65 /* --------------------------------------------------------------------------------------------- */
66 /*** file scope functions ************************************************************************/
67 /* --------------------------------------------------------------------------------------------- */
69 static codepage_desc *
70 new_codepage_desc (const char *id, const char *name)
72 codepage_desc *desc;
74 desc = g_new (codepage_desc, 1);
75 desc->id = g_strdup (id);
76 desc->name = g_strdup (name);
78 return desc;
81 /* --------------------------------------------------------------------------------------------- */
83 static void
84 free_codepage_desc (gpointer data)
86 codepage_desc *desc = (codepage_desc *) data;
88 g_free (desc->id);
89 g_free (desc->name);
90 g_free (desc);
93 /* --------------------------------------------------------------------------------------------- */
94 /* returns display codepage */
96 static void
97 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
99 FILE *f;
100 char buf[BUF_MEDIUM];
101 char *default_codepage = NULL;
103 f = fopen (fname, "r");
104 if (f == NULL)
105 return;
107 while (fgets (buf, sizeof buf, f) != NULL)
109 /* split string into id and cpname */
110 char *p = buf;
111 size_t buflen;
113 if (*p == '\n' || *p == '\0' || *p == '#')
114 continue;
116 buflen = strlen (buf);
118 if (buflen != 0 && buf[buflen - 1] == '\n')
119 buf[buflen - 1] = '\0';
120 while (*p != '\0' && !whitespace (*p))
121 ++p;
122 if (*p == '\0')
123 goto fail;
125 *p++ = '\0';
126 g_strstrip (p);
127 if (*p == '\0')
128 goto fail;
130 if (strcmp (buf, "default") == 0)
131 default_codepage = g_strdup (p);
132 else
134 const char *id = buf;
136 if (*list == NULL)
138 *list = g_ptr_array_sized_new (16);
139 g_ptr_array_set_free_func (*list, free_codepage_desc);
140 g_ptr_array_add (*list, new_codepage_desc (id, p));
142 else
144 unsigned int i;
146 /* whether id is already present in list */
147 /* if yes, overwrite description */
148 for (i = 0; i < (*list)->len; i++)
150 codepage_desc *desc;
152 desc = (codepage_desc *) g_ptr_array_index (*list, i);
154 if (strcmp (id, desc->id) == 0)
156 /* found */
157 g_free (desc->name);
158 desc->name = g_strdup (p);
159 break;
163 /* not found */
164 if (i == (*list)->len)
165 g_ptr_array_add (*list, new_codepage_desc (id, p));
170 if (default_codepage != NULL)
172 mc_global.display_codepage = get_codepage_index (default_codepage);
173 g_free (default_codepage);
176 fail:
177 fclose (f);
180 /* --------------------------------------------------------------------------------------------- */
182 static char
183 translate_character (GIConv cd, char c)
185 gchar *tmp_buff = NULL;
186 gsize bytes_read, bytes_written = 0;
187 const char *ibuf = &c;
188 char ch = UNKNCHAR;
189 int ibuflen = 1;
191 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
192 if (tmp_buff != NULL)
193 ch = tmp_buff[0];
194 g_free (tmp_buff);
195 return ch;
198 /* --------------------------------------------------------------------------------------------- */
199 /*** public functions ****************************************************************************/
200 /* --------------------------------------------------------------------------------------------- */
202 void
203 load_codepages_list (void)
205 char *fname;
207 /* 1: try load /usr/share/mc/mc.charsets */
208 fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
209 load_codepages_list_from_file (&codepages, fname);
210 g_free (fname);
212 /* 2: try load /etc/mc/mc.charsets */
213 fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
214 load_codepages_list_from_file (&codepages, fname);
215 g_free (fname);
217 if (codepages == NULL)
219 /* files are not found, add default codepage */
220 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
222 codepages = g_ptr_array_new_with_free_func (free_codepage_desc);
223 g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _("7-bit ASCII")));
227 /* --------------------------------------------------------------------------------------------- */
229 void
230 free_codepages_list (void)
232 g_ptr_array_free (codepages, TRUE);
233 /* NULL-ize pointer to make unit tests happy */
234 codepages = NULL;
237 /* --------------------------------------------------------------------------------------------- */
239 const char *
240 get_codepage_id (const int n)
242 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
245 /* --------------------------------------------------------------------------------------------- */
248 get_codepage_index (const char *id)
250 size_t i;
252 if (codepages == NULL)
253 return -1;
254 if (strcmp (id, OTHER_8BIT) == 0)
255 return -1;
256 for (i = 0; i < codepages->len; i++)
257 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
258 return i;
259 return -1;
262 /* --------------------------------------------------------------------------------------------- */
263 /** Check if specified encoding can be used in mc.
264 * @param encoding name of encoding
265 * @return TRUE if encoding is supported by mc, FALSE otherwise
268 gboolean
269 is_supported_encoding (const char *encoding)
271 gboolean result = FALSE;
272 guint t;
274 for (t = 0; t < codepages->len; t++)
276 const char *id;
278 id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
279 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
282 return result;
285 /* --------------------------------------------------------------------------------------------- */
287 char *
288 init_translation_table (int cpsource, int cpdisplay)
290 int i;
291 GIConv cd;
293 /* Fill inpit <-> display tables */
295 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
297 for (i = 0; i <= 255; ++i)
299 conv_displ[i] = i;
300 conv_input[i] = i;
302 cp_source = cp_display;
303 return NULL;
306 for (i = 0; i <= 127; ++i)
308 conv_displ[i] = i;
309 conv_input[i] = i;
311 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
312 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
314 /* display <- inpit table */
316 cd = g_iconv_open (cp_display, cp_source);
317 if (cd == INVALID_CONV)
318 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
320 for (i = 128; i <= 255; ++i)
321 conv_displ[i] = translate_character (cd, i);
323 g_iconv_close (cd);
325 /* inpit <- display table */
327 cd = g_iconv_open (cp_source, cp_display);
328 if (cd == INVALID_CONV)
329 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
331 for (i = 128; i <= 255; ++i)
333 unsigned char ch;
334 ch = translate_character (cd, i);
335 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
338 g_iconv_close (cd);
340 return NULL;
343 /* --------------------------------------------------------------------------------------------- */
345 void
346 convert_to_display (char *str)
348 if (str != NULL)
349 for (; *str != '\0'; str++)
350 *str = conv_displ[(unsigned char) *str];
353 /* --------------------------------------------------------------------------------------------- */
355 GString *
356 str_nconvert_to_display (const char *str, int len)
358 GString *buff;
359 GIConv conv;
361 if (str == NULL)
362 return NULL;
364 if (cp_display == cp_source)
365 return g_string_new (str);
367 conv = str_crt_conv_from (cp_source);
369 buff = g_string_new ("");
370 str_nconvert (conv, str, len, buff);
371 str_close_conv (conv);
372 return buff;
375 /* --------------------------------------------------------------------------------------------- */
377 void
378 convert_from_input (char *str)
380 if (str != NULL)
381 for (; *str != '\0'; str++)
382 *str = conv_input[(unsigned char) *str];
385 /* --------------------------------------------------------------------------------------------- */
387 GString *
388 str_nconvert_to_input (const char *str, int len)
390 GString *buff;
391 GIConv conv;
393 if (str == NULL)
394 return NULL;
396 if (cp_display == cp_source)
397 return g_string_new (str);
399 conv = str_crt_conv_to (cp_source);
401 buff = g_string_new ("");
402 str_nconvert (conv, str, len, buff);
403 str_close_conv (conv);
404 return buff;
407 /* --------------------------------------------------------------------------------------------- */
409 unsigned char
410 convert_from_utf_to_current (const char *str)
412 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
413 unsigned char ch = '.';
414 GIConv conv;
415 const char *cp_to;
417 if (str == NULL)
418 return '.';
420 cp_to = get_codepage_id (mc_global.source_codepage);
421 conv = str_crt_conv_to (cp_to);
423 if (conv != INVALID_CONV)
425 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
427 case ESTR_SUCCESS:
428 ch = buf_ch[0];
429 break;
430 case ESTR_PROBLEM:
431 case ESTR_FAILURE:
432 ch = '.';
433 break;
434 default:
435 break;
437 str_close_conv (conv);
440 return ch;
443 /* --------------------------------------------------------------------------------------------- */
445 unsigned char
446 convert_from_utf_to_current_c (int input_char, GIConv conv)
448 unsigned char str[UTF8_CHAR_LEN + 1];
449 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
450 unsigned char ch = '.';
451 int res;
453 res = g_unichar_to_utf8 (input_char, (char *) str);
454 if (res == 0)
455 return ch;
457 str[res] = '\0';
459 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
461 case ESTR_SUCCESS:
462 ch = buf_ch[0];
463 break;
464 case ESTR_PROBLEM:
465 case ESTR_FAILURE:
466 ch = '.';
467 break;
468 default:
469 break;
472 return ch;
475 /* --------------------------------------------------------------------------------------------- */
478 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
480 unsigned char str[2];
481 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
482 int ch;
484 str[0] = (unsigned char) input_char;
485 str[1] = '\0';
487 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
489 case ESTR_SUCCESS:
491 int res;
493 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
494 ch = res >= 0 ? res : buf_ch[0];
495 break;
497 case ESTR_PROBLEM:
498 case ESTR_FAILURE:
499 default:
500 ch = '.';
501 break;
504 return ch;
507 /* --------------------------------------------------------------------------------------------- */
510 convert_from_8bit_to_utf_c2 (char input_char)
512 int ch = '.';
513 GIConv conv;
514 const char *cp_from;
516 cp_from = get_codepage_id (mc_global.source_codepage);
518 conv = str_crt_conv_to (cp_from);
519 if (conv != INVALID_CONV)
521 ch = convert_from_8bit_to_utf_c (input_char, conv);
522 str_close_conv (conv);
525 return ch;
528 /* --------------------------------------------------------------------------------------------- */