glibcompat: remove g_direct_equal shim
[midnight-commander.git] / lib / charsets.c
blob0df5ecb21032e7ee360a7fbf31e07479dafa8140
1 /*
2 Text conversion from one charset to another.
4 Copyright (C) 2001-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Walery Studennikov <despair@sama.ru>
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /** \file charsets.c
27 * \brief Source: Text conversion from one charset to another
30 #include <config.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
36 #include "lib/global.h"
37 #include "lib/strutil.h" /* utf-8 functions */
38 #include "lib/fileloc.h"
39 #include "lib/util.h" /* whitespace() */
41 #include "lib/charsets.h"
43 /*** global variables ****************************************************************************/
45 GPtrArray *codepages = NULL;
47 unsigned char conv_displ[256];
48 unsigned char conv_input[256];
50 const char *cp_display = NULL;
51 const char *cp_source = NULL;
53 /*** file scope macro definitions ****************************************************************/
55 #define UNKNCHAR '\001'
57 #define OTHER_8BIT "Other_8_bit"
59 /*** file scope type declarations ****************************************************************/
61 /*** forward declarations (file scope functions) *************************************************/
63 /*** file scope variables ************************************************************************/
65 /* --------------------------------------------------------------------------------------------- */
66 /*** file scope functions ************************************************************************/
67 /* --------------------------------------------------------------------------------------------- */
69 static codepage_desc *
70 new_codepage_desc (const char *id, const char *name)
72 codepage_desc *desc;
74 desc = g_new (codepage_desc, 1);
75 desc->id = g_strdup (id);
76 desc->name = g_strdup (name);
78 return desc;
81 /* --------------------------------------------------------------------------------------------- */
83 static void
84 free_codepage_desc (gpointer data)
86 codepage_desc *desc = (codepage_desc *) data;
88 g_free (desc->id);
89 g_free (desc->name);
90 g_free (desc);
93 /* --------------------------------------------------------------------------------------------- */
94 /* returns display codepage */
96 static void
97 load_codepages_list_from_file (GPtrArray **list, const char *fname)
99 FILE *f;
100 char buf[BUF_MEDIUM];
101 char *default_codepage = NULL;
103 f = fopen (fname, "r");
104 if (f == NULL)
105 return;
107 while (fgets (buf, sizeof buf, f) != NULL)
109 /* split string into id and cpname */
110 char *p = buf;
111 size_t buflen;
113 if (*p == '\n' || *p == '\0' || *p == '#')
114 continue;
116 buflen = strlen (buf);
118 if (buflen != 0 && buf[buflen - 1] == '\n')
119 buf[buflen - 1] = '\0';
120 while (*p != '\0' && !whitespace (*p))
121 ++p;
122 if (*p == '\0')
123 goto fail;
125 *p++ = '\0';
126 g_strstrip (p);
127 if (*p == '\0')
128 goto fail;
130 if (strcmp (buf, "default") == 0)
131 default_codepage = g_strdup (p);
132 else
134 const char *id = buf;
136 if (*list == NULL)
138 *list = g_ptr_array_new_full (16, free_codepage_desc);
139 g_ptr_array_add (*list, new_codepage_desc (id, p));
141 else
143 unsigned int i;
145 /* whether id is already present in list */
146 /* if yes, overwrite description */
147 for (i = 0; i < (*list)->len; i++)
149 codepage_desc *desc;
151 desc = (codepage_desc *) g_ptr_array_index (*list, i);
153 if (strcmp (id, desc->id) == 0)
155 /* found */
156 g_free (desc->name);
157 desc->name = g_strdup (p);
158 break;
162 /* not found */
163 if (i == (*list)->len)
164 g_ptr_array_add (*list, new_codepage_desc (id, p));
169 if (default_codepage != NULL)
171 mc_global.display_codepage = get_codepage_index (default_codepage);
172 g_free (default_codepage);
175 fail:
176 fclose (f);
179 /* --------------------------------------------------------------------------------------------- */
181 static char
182 translate_character (GIConv cd, char c)
184 gchar *tmp_buff = NULL;
185 gsize bytes_read, bytes_written = 0;
186 const char *ibuf = &c;
187 char ch = UNKNCHAR;
188 int ibuflen = 1;
190 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
191 if (tmp_buff != NULL)
192 ch = tmp_buff[0];
193 g_free (tmp_buff);
194 return ch;
197 /* --------------------------------------------------------------------------------------------- */
198 /*** public functions ****************************************************************************/
199 /* --------------------------------------------------------------------------------------------- */
201 void
202 load_codepages_list (void)
204 char *fname;
206 /* 1: try load /usr/share/mc/mc.charsets */
207 fname = g_build_filename (mc_global.share_data_dir, CHARSETS_LIST, (char *) NULL);
208 load_codepages_list_from_file (&codepages, fname);
209 g_free (fname);
211 /* 2: try load /etc/mc/mc.charsets */
212 fname = g_build_filename (mc_global.sysconfig_dir, CHARSETS_LIST, (char *) NULL);
213 load_codepages_list_from_file (&codepages, fname);
214 g_free (fname);
216 if (codepages == NULL)
218 /* files are not found, add default codepage */
219 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
221 codepages = g_ptr_array_new_with_free_func (free_codepage_desc);
222 g_ptr_array_add (codepages, new_codepage_desc (DEFAULT_CHARSET, _("7-bit ASCII")));
226 /* --------------------------------------------------------------------------------------------- */
228 void
229 free_codepages_list (void)
231 g_ptr_array_free (codepages, TRUE);
232 /* NULL-ize pointer to make unit tests happy */
233 codepages = NULL;
236 /* --------------------------------------------------------------------------------------------- */
238 const char *
239 get_codepage_id (const int n)
241 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
244 /* --------------------------------------------------------------------------------------------- */
247 get_codepage_index (const char *id)
249 size_t i;
251 if (codepages == NULL)
252 return -1;
253 if (strcmp (id, OTHER_8BIT) == 0)
254 return -1;
255 for (i = 0; i < codepages->len; i++)
256 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
257 return i;
258 return -1;
261 /* --------------------------------------------------------------------------------------------- */
262 /** Check if specified encoding can be used in mc.
263 * @param encoding name of encoding
264 * @return TRUE if encoding is supported by mc, FALSE otherwise
267 gboolean
268 is_supported_encoding (const char *encoding)
270 gboolean result = FALSE;
271 guint t;
273 for (t = 0; t < codepages->len; t++)
275 const char *id;
277 id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
278 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
281 return result;
284 /* --------------------------------------------------------------------------------------------- */
286 char *
287 init_translation_table (int cpsource, int cpdisplay)
289 int i;
290 GIConv cd;
292 /* Fill inpit <-> display tables */
294 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
296 for (i = 0; i <= 255; ++i)
298 conv_displ[i] = i;
299 conv_input[i] = i;
301 cp_source = cp_display;
302 return NULL;
305 for (i = 0; i <= 127; ++i)
307 conv_displ[i] = i;
308 conv_input[i] = i;
310 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
311 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
313 /* display <- inpit table */
315 cd = g_iconv_open (cp_display, cp_source);
316 if (cd == INVALID_CONV)
317 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
319 for (i = 128; i <= 255; ++i)
320 conv_displ[i] = translate_character (cd, i);
322 g_iconv_close (cd);
324 /* inpit <- display table */
326 cd = g_iconv_open (cp_source, cp_display);
327 if (cd == INVALID_CONV)
328 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
330 for (i = 128; i <= 255; ++i)
332 unsigned char ch;
333 ch = translate_character (cd, i);
334 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
337 g_iconv_close (cd);
339 return NULL;
342 /* --------------------------------------------------------------------------------------------- */
344 void
345 convert_to_display (char *str)
347 if (str != NULL)
348 for (; *str != '\0'; str++)
349 *str = conv_displ[(unsigned char) *str];
352 /* --------------------------------------------------------------------------------------------- */
354 GString *
355 str_nconvert_to_display (const char *str, int len)
357 GString *buff;
358 GIConv conv;
360 if (str == NULL)
361 return NULL;
363 if (cp_display == cp_source)
364 return g_string_new (str);
366 conv = str_crt_conv_from (cp_source);
368 buff = g_string_new ("");
369 str_nconvert (conv, str, len, buff);
370 str_close_conv (conv);
371 return buff;
374 /* --------------------------------------------------------------------------------------------- */
376 void
377 convert_from_input (char *str)
379 if (str != NULL)
380 for (; *str != '\0'; str++)
381 *str = conv_input[(unsigned char) *str];
384 /* --------------------------------------------------------------------------------------------- */
386 GString *
387 str_nconvert_to_input (const char *str, int len)
389 GString *buff;
390 GIConv conv;
392 if (str == NULL)
393 return NULL;
395 if (cp_display == cp_source)
396 return g_string_new (str);
398 conv = str_crt_conv_to (cp_source);
400 buff = g_string_new ("");
401 str_nconvert (conv, str, len, buff);
402 str_close_conv (conv);
403 return buff;
406 /* --------------------------------------------------------------------------------------------- */
408 unsigned char
409 convert_from_utf_to_current (const char *str)
411 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
412 unsigned char ch = '.';
413 GIConv conv;
414 const char *cp_to;
416 if (str == NULL)
417 return '.';
419 cp_to = get_codepage_id (mc_global.source_codepage);
420 conv = str_crt_conv_to (cp_to);
422 if (conv != INVALID_CONV)
424 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
426 case ESTR_SUCCESS:
427 ch = buf_ch[0];
428 break;
429 case ESTR_PROBLEM:
430 case ESTR_FAILURE:
431 ch = '.';
432 break;
433 default:
434 break;
436 str_close_conv (conv);
439 return ch;
442 /* --------------------------------------------------------------------------------------------- */
444 unsigned char
445 convert_from_utf_to_current_c (int input_char, GIConv conv)
447 unsigned char str[UTF8_CHAR_LEN + 1];
448 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
449 unsigned char ch = '.';
450 int res;
452 res = g_unichar_to_utf8 (input_char, (char *) str);
453 if (res == 0)
454 return ch;
456 str[res] = '\0';
458 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
460 case ESTR_SUCCESS:
461 ch = buf_ch[0];
462 break;
463 case ESTR_PROBLEM:
464 case ESTR_FAILURE:
465 ch = '.';
466 break;
467 default:
468 break;
471 return ch;
474 /* --------------------------------------------------------------------------------------------- */
477 convert_from_8bit_to_utf_c (char input_char, GIConv conv)
479 unsigned char str[2];
480 unsigned char buf_ch[UTF8_CHAR_LEN + 1];
481 int ch;
483 str[0] = (unsigned char) input_char;
484 str[1] = '\0';
486 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
488 case ESTR_SUCCESS:
490 int res;
492 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
493 ch = res >= 0 ? res : buf_ch[0];
494 break;
496 case ESTR_PROBLEM:
497 case ESTR_FAILURE:
498 default:
499 ch = '.';
500 break;
503 return ch;
506 /* --------------------------------------------------------------------------------------------- */
509 convert_from_8bit_to_utf_c2 (char input_char)
511 int ch = '.';
512 GIConv conv;
513 const char *cp_from;
515 cp_from = get_codepage_id (mc_global.source_codepage);
517 conv = str_crt_conv_to (cp_from);
518 if (conv != INVALID_CONV)
520 ch = convert_from_8bit_to_utf_c (input_char, conv);
521 str_close_conv (conv);
524 return ch;
527 /* --------------------------------------------------------------------------------------------- */