Fixed searching the start of word
[midnight-commander.git] / lib / charsets.c
blobaef56ccd61a2a59cd176f3b28d3bf1a99712399b
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 /** \file charsets.c
21 * \brief Source: Text conversion from one charset to another
24 #include <config.h>
26 #ifdef HAVE_CHARSET
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
35 #include "lib/charsets.h"
37 #include "src/main.h"
39 /*** global variables ****************************************************************************/
41 GPtrArray *codepages = NULL;
43 unsigned char conv_displ[256];
44 unsigned char conv_input[256];
46 const char *cp_display = NULL;
47 const char *cp_source = NULL;
49 /*** file scope macro definitions ****************************************************************/
51 #define OTHER_8BIT "Other_8_bit"
54 * FIXME: This assumes that ASCII is always the first encoding
55 * in mc.charsets
57 #define CP_ASCII 0
59 /*** file scope type declarations ****************************************************************/
61 /*** file scope variables ************************************************************************/
63 /*** file scope functions ************************************************************************/
64 /* --------------------------------------------------------------------------------------------- */
66 static codepage_desc *
67 new_codepage_desc (const char *id, const char *name)
69 codepage_desc *desc;
71 desc = g_new (codepage_desc, 1);
72 desc->id = g_strdup (id);
73 desc->name = g_strdup (name);
75 return desc;
78 /* --------------------------------------------------------------------------------------------- */
80 static void
81 free_codepage_desc (gpointer data, gpointer user_data)
83 codepage_desc *desc = (codepage_desc *) data;
84 (void) user_data;
86 g_free (desc->id);
87 g_free (desc->name);
88 g_free (desc);
91 /* --------------------------------------------------------------------------------------------- */
92 /* returns display codepage */
94 static void
95 load_codepages_list_from_file (GPtrArray ** list, const char *fname)
97 FILE *f;
98 guint i;
99 char buf[BUF_MEDIUM];
100 char *default_codepage = NULL;
102 f = fopen (fname, "r");
103 if (f == NULL)
104 return;
106 for (i = 0; fgets (buf, sizeof buf, f) != NULL;)
108 /* split string into id and cpname */
109 char *p = buf;
110 size_t buflen = strlen (buf);
112 if (*p == '\n' || *p == '\0' || *p == '#')
113 continue;
115 if (buflen > 0 && buf[buflen - 1] == '\n')
116 buf[buflen - 1] = '\0';
117 while (*p != '\t' && *p != ' ' && *p != '\0')
118 ++p;
119 if (*p == '\0')
120 goto fail;
122 *p++ = '\0';
123 g_strstrip (p);
124 if (*p == '\0')
125 goto fail;
127 if (strcmp (buf, "default") == 0)
128 default_codepage = g_strdup (p);
129 else
131 const char *id = buf;
133 if (*list == NULL)
135 *list = g_ptr_array_sized_new (16);
136 g_ptr_array_add (*list, new_codepage_desc (id, p));
138 else
140 /* whether id is already present in list */
141 /* if yes, overwrite description */
142 for (i = 0; i < (*list)->len; i++)
144 codepage_desc *desc;
146 desc = (codepage_desc *) g_ptr_array_index (*list, i);
148 if (strcmp (id, desc->id) == 0)
150 /* found */
151 g_free (desc->name);
152 desc->name = g_strdup (p);
153 break;
157 /* not found */
158 if (i == (*list)->len)
159 g_ptr_array_add (*list, new_codepage_desc (id, p));
164 if (default_codepage != NULL)
166 display_codepage = get_codepage_index (default_codepage);
167 g_free (default_codepage);
170 fail:
171 fclose (f);
174 /* --------------------------------------------------------------------------------------------- */
176 static char
177 translate_character (GIConv cd, char c)
179 gchar *tmp_buff = NULL;
180 gsize bytes_read, bytes_written = 0;
181 const char *ibuf = &c;
182 char ch = UNKNCHAR;
184 int ibuflen = 1;
186 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
187 if (tmp_buff)
188 ch = tmp_buff[0];
189 g_free (tmp_buff);
190 return ch;
193 /* --------------------------------------------------------------------------------------------- */
194 /*** public functions ****************************************************************************/
195 /* --------------------------------------------------------------------------------------------- */
197 void
198 load_codepages_list (void)
200 char *fname;
202 /* 1: try load /usr/share/mc/mc.charsets */
203 fname = g_build_filename (mc_home_alt, CHARSETS_LIST, (char *) NULL);
204 load_codepages_list_from_file (&codepages, fname);
205 g_free (fname);
207 /* 2: try load /etc/mc/mc.charsets */
208 fname = g_build_filename (mc_home, CHARSETS_LIST, (char *) NULL);
209 load_codepages_list_from_file (&codepages, fname);
210 g_free (fname);
212 if (codepages == NULL)
214 /* files are not found, add defaullt codepage */
215 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
217 codepages = g_ptr_array_new ();
218 g_ptr_array_add (codepages, new_codepage_desc ("ASCII", _("7-bit ASCII")));
222 /* --------------------------------------------------------------------------------------------- */
224 void
225 free_codepages_list (void)
227 g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
228 g_ptr_array_free (codepages, TRUE);
231 /* --------------------------------------------------------------------------------------------- */
233 const char *
234 get_codepage_id (const int n)
236 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
239 /* --------------------------------------------------------------------------------------------- */
242 get_codepage_index (const char *id)
244 size_t i;
245 if (strcmp (id, OTHER_8BIT) == 0)
246 return -1;
247 if (codepages == NULL)
248 return -1;
249 for (i = 0; i < codepages->len; i++)
250 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
251 return i;
252 return -1;
255 /* --------------------------------------------------------------------------------------------- */
256 /** Check if specified encoding can be used in mc.
257 * @param encoding name of encoding
258 * @returns TRUE if encoding has supported by mc, FALSE otherwise
261 gboolean
262 is_supported_encoding (const char *encoding)
264 gboolean result = FALSE;
265 guint t;
267 for (t = 0; t < codepages->len; t++)
269 const char *id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
270 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
273 return result;
276 /* --------------------------------------------------------------------------------------------- */
278 char *
279 init_translation_table (int cpsource, int cpdisplay)
281 int i;
282 GIConv cd;
284 /* Fill inpit <-> display tables */
286 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay)
288 for (i = 0; i <= 255; ++i)
290 conv_displ[i] = i;
291 conv_input[i] = i;
292 cp_source = cp_display;
294 return NULL;
297 for (i = 0; i <= 127; ++i)
299 conv_displ[i] = i;
300 conv_input[i] = i;
302 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
303 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
305 /* display <- inpit table */
307 cd = g_iconv_open (cp_display, cp_source);
308 if (cd == INVALID_CONV)
309 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
311 for (i = 128; i <= 255; ++i)
312 conv_displ[i] = translate_character (cd, i);
314 g_iconv_close (cd);
316 /* inpit <- display table */
318 cd = g_iconv_open (cp_source, cp_display);
319 if (cd == INVALID_CONV)
320 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
322 for (i = 128; i <= 255; ++i)
324 unsigned char ch;
325 ch = translate_character (cd, i);
326 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
329 g_iconv_close (cd);
331 return NULL;
334 /* --------------------------------------------------------------------------------------------- */
336 void
337 convert_to_display (char *str)
339 if (!str)
340 return;
342 while (*str)
344 *str = conv_displ[(unsigned char) *str];
345 str++;
349 /* --------------------------------------------------------------------------------------------- */
351 GString *
352 str_convert_to_display (char *str)
354 return str_nconvert_to_display (str, -1);
358 /* --------------------------------------------------------------------------------------------- */
360 GString *
361 str_nconvert_to_display (char *str, int len)
363 GString *buff;
364 GIConv conv;
366 if (!str)
367 return g_string_new ("");
369 if (cp_display == cp_source)
370 return g_string_new (str);
372 conv = str_crt_conv_from (cp_source);
374 buff = g_string_new ("");
375 str_nconvert (conv, str, len, buff);
376 str_close_conv (conv);
377 return buff;
380 /* --------------------------------------------------------------------------------------------- */
382 void
383 convert_from_input (char *str)
385 if (!str)
386 return;
388 while (*str)
390 *str = conv_input[(unsigned char) *str];
391 str++;
395 /* --------------------------------------------------------------------------------------------- */
397 GString *
398 str_convert_to_input (char *str)
400 return str_nconvert_to_input (str, -1);
403 /* --------------------------------------------------------------------------------------------- */
405 GString *
406 str_nconvert_to_input (char *str, int len)
408 GString *buff;
409 GIConv conv;
411 if (!str)
412 return g_string_new ("");
414 if (cp_display == cp_source)
415 return g_string_new (str);
417 conv = str_crt_conv_to (cp_source);
419 buff = g_string_new ("");
420 str_nconvert (conv, str, len, buff);
421 str_close_conv (conv);
422 return buff;
425 /* --------------------------------------------------------------------------------------------- */
427 unsigned char
428 convert_from_utf_to_current (const char *str)
430 unsigned char buf_ch[6 + 1];
431 unsigned char ch = '.';
432 GIConv conv;
433 const char *cp_to;
435 if (!str)
436 return '.';
438 cp_to = get_codepage_id (source_codepage);
439 conv = str_crt_conv_to (cp_to);
441 if (conv != INVALID_CONV)
443 switch (str_translate_char (conv, str, -1, (char *) buf_ch, sizeof (buf_ch)))
445 case ESTR_SUCCESS:
446 ch = buf_ch[0];
447 break;
448 case ESTR_PROBLEM:
449 case ESTR_FAILURE:
450 ch = '.';
451 break;
453 str_close_conv (conv);
456 return ch;
460 /* --------------------------------------------------------------------------------------------- */
462 unsigned char
463 convert_from_utf_to_current_c (const int input_char, GIConv conv)
465 unsigned char str[6 + 1];
466 unsigned char buf_ch[6 + 1];
467 unsigned char ch = '.';
469 int res = 0;
471 res = g_unichar_to_utf8 (input_char, (char *) str);
472 if (res == 0)
474 return ch;
476 str[res] = '\0';
478 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
480 case ESTR_SUCCESS:
481 ch = buf_ch[0];
482 break;
483 case ESTR_PROBLEM:
484 case ESTR_FAILURE:
485 ch = '.';
486 break;
488 return ch;
491 /* --------------------------------------------------------------------------------------------- */
494 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
496 unsigned char str[2];
497 unsigned char buf_ch[6 + 1];
498 int ch = '.';
499 int res = 0;
501 str[0] = (unsigned char) input_char;
502 str[1] = '\0';
504 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
506 case ESTR_SUCCESS:
507 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
508 if (res < 0)
510 ch = buf_ch[0];
512 else
514 ch = res;
516 break;
517 case ESTR_PROBLEM:
518 case ESTR_FAILURE:
519 ch = '.';
520 break;
522 return ch;
525 /* --------------------------------------------------------------------------------------------- */
528 convert_from_8bit_to_utf_c2 (const char input_char)
530 unsigned char str[2];
531 unsigned char buf_ch[6 + 1];
532 int ch = '.';
533 int res = 0;
534 GIConv conv;
535 const char *cp_from;
537 str[0] = (unsigned char) input_char;
538 str[1] = '\0';
540 cp_from = get_codepage_id (source_codepage);
541 conv = str_crt_conv_to (cp_from);
543 if (conv != INVALID_CONV)
545 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof (buf_ch)))
547 case ESTR_SUCCESS:
548 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
549 if (res < 0)
551 ch = buf_ch[0];
553 else
555 ch = res;
557 break;
558 case ESTR_PROBLEM:
559 case ESTR_FAILURE:
560 ch = '.';
561 break;
563 str_close_conv (conv);
565 return ch;
569 /* --------------------------------------------------------------------------------------------- */
571 #endif /* HAVE_CHARSET */