(edit_set_filename): expand tilde while setting file name.
[pantumic.git] / src / charsets.c
blob009f9b5e7688d0139219f8de09fe304a27f258a6
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 /** \file charsets.c
21 * \brief Source: Text conversion from one charset to another
24 #include <config.h>
26 #ifdef HAVE_CHARSET
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
36 #include "charsets.h"
37 #include "main.h"
39 GPtrArray *codepages = NULL;
41 unsigned char conv_displ[256];
42 unsigned char conv_input[256];
44 const char *cp_display = NULL;
45 const char *cp_source = NULL;
47 static codepage_desc *
48 new_codepage_desc (const char *id, const char *name)
50 codepage_desc *desc;
52 desc = g_new (codepage_desc, 1);
53 desc->id = g_strdup (id);
54 desc->name = g_strdup (name);
56 return desc;
59 static void
60 free_codepage_desc (gpointer data, gpointer user_data)
62 codepage_desc *desc = (codepage_desc *) data;
63 (void) user_data;
65 g_free (desc->id);
66 g_free (desc->name);
67 g_free (desc);
70 /* returns display codepage */
71 static void
72 load_codepages_list_from_file (GPtrArray **list, const char *fname)
74 FILE *f;
75 guint i;
76 char buf[BUF_MEDIUM];
77 char *default_codepage = NULL;
79 f = fopen (fname, "r");
80 if (f == NULL)
81 return;
83 for (i = 0; fgets (buf, sizeof buf, f) != NULL; )
85 /* split string into id and cpname */
86 char *p = buf;
87 size_t buflen = strlen (buf);
89 if (*p == '\n' || *p == '\0' || *p == '#')
90 continue;
92 if (buflen > 0 && buf[buflen - 1] == '\n')
93 buf[buflen - 1] = '\0';
94 while (*p != '\t' && *p != ' ' && *p != '\0')
95 ++p;
96 if (*p == '\0')
97 goto fail;
99 *p++ = '\0';
100 g_strstrip (p);
101 if (*p == '\0')
102 goto fail;
104 if (strcmp (buf, "default") == 0)
105 default_codepage = g_strdup (p);
106 else
108 const char *id = buf;
110 if (*list == NULL)
112 *list = g_ptr_array_sized_new (16);
113 g_ptr_array_add (*list, new_codepage_desc (id, p));
115 else
117 guint i;
118 codepage_desc *desc;
120 /* whether id is already present in list */
121 /* if yes, overwrite description */
122 for (i = 0; i < (*list)->len; i++)
124 codepage_desc *desc;
126 desc = (codepage_desc *) g_ptr_array_index (*list, i);
128 if (strcmp (id, desc->id) == 0)
130 /* found */
131 g_free (desc->name);
132 desc->name = g_strdup (p);
133 break;
137 /* not found */
138 if (i == (*list)->len)
139 g_ptr_array_add (*list, new_codepage_desc (id, p));
144 if (default_codepage != NULL)
146 display_codepage = get_codepage_index (default_codepage);
147 g_free (default_codepage);
150 fail:
151 fclose (f);
154 void
155 load_codepages_list (void)
157 int result = -1;
158 char *fname;
160 /* 1: try load /usr/share/mc/mc.charsets */
161 fname = g_build_filename (mc_home_alt, CHARSETS_LIST, (char *) NULL);
162 load_codepages_list_from_file (&codepages, fname);
163 g_free (fname);
165 /* 2: try load /etc/mc/mc.charsets */
166 fname = g_build_filename (mc_home, CHARSETS_LIST, (char *) NULL);
167 load_codepages_list_from_file (&codepages, fname);
168 g_free (fname);
170 if (codepages == NULL)
172 /* files are not found, add defaullt codepage */
173 fprintf (stderr, "%s\n", _("Warning: cannot load codepages list"));
175 codepages = g_ptr_array_new ();
176 g_ptr_array_add (codepages, new_codepage_desc ("ASCII", _("7-bit ASCII")));
180 void
181 free_codepages_list (void)
183 g_ptr_array_foreach (codepages, free_codepage_desc, NULL);
184 g_ptr_array_free (codepages, TRUE);
187 #define OTHER_8BIT "Other_8_bit"
189 const char *
190 get_codepage_id (const int n)
192 return (n < 0) ? OTHER_8BIT : ((codepage_desc *) g_ptr_array_index (codepages, n))->id;
196 get_codepage_index (const char *id)
198 int i;
199 if (strcmp (id, OTHER_8BIT) == 0)
200 return -1;
201 if (codepages == NULL)
202 return -1;
203 for (i = 0; i < codepages->len; i++)
204 if (strcmp (id, ((codepage_desc *) g_ptr_array_index (codepages, i))->id) == 0)
205 return i;
206 return -1;
209 /** Check if specified encoding can be used in mc.
210 * @param encoding name of encoding
211 * @returns TRUE if encoding has supported by mc, FALSE otherwise
213 gboolean
214 is_supported_encoding (const char *encoding)
216 gboolean result = FALSE;
217 guint t;
219 for (t = 0; t < codepages->len; t++)
221 const char *id = ((codepage_desc *) g_ptr_array_index (codepages, t))->id;
222 result |= (g_ascii_strncasecmp (encoding, id, strlen (id)) == 0);
225 return result;
228 static char
229 translate_character (GIConv cd, char c)
231 gchar *tmp_buff = NULL;
232 gsize bytes_read, bytes_written = 0;
233 const char *ibuf = &c;
234 char ch = UNKNCHAR;
236 int ibuflen = 1;
238 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
239 if ( tmp_buff )
240 ch = tmp_buff[0];
241 g_free (tmp_buff);
242 return ch;
246 * FIXME: This assumes that ASCII is always the first encoding
247 * in mc.charsets
249 #define CP_ASCII 0
251 char *
252 init_translation_table (int cpsource, int cpdisplay)
254 int i;
255 GIConv cd;
257 /* Fill inpit <-> display tables */
259 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay) {
260 for (i = 0; i <= 255; ++i) {
261 conv_displ[i] = i;
262 conv_input[i] = i;
263 cp_source = cp_display;
265 return NULL;
268 for (i = 0; i <= 127; ++i) {
269 conv_displ[i] = i;
270 conv_input[i] = i;
272 cp_source = ((codepage_desc *) g_ptr_array_index (codepages, cpsource))->id;
273 cp_display = ((codepage_desc *) g_ptr_array_index (codepages, cpdisplay))->id;
275 /* display <- inpit table */
277 cd = g_iconv_open (cp_display, cp_source);
278 if (cd == INVALID_CONV)
279 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
281 for (i = 128; i <= 255; ++i)
282 conv_displ[i] = translate_character (cd, i);
284 g_iconv_close (cd);
286 /* inpit <- display table */
288 cd = g_iconv_open (cp_source, cp_display);
289 if (cd == INVALID_CONV)
290 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
292 for (i = 128; i <= 255; ++i) {
293 unsigned char ch;
294 ch = translate_character (cd, i);
295 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
298 g_iconv_close (cd);
300 return NULL;
303 void
304 convert_to_display (char *str)
306 if (!str)
307 return;
309 while (*str) {
310 *str = conv_displ[(unsigned char) *str];
311 str++;
315 GString *
316 str_convert_to_display (char *str)
318 return str_nconvert_to_display (str, -1);
322 GString *
323 str_nconvert_to_display (char *str, int len)
325 GString *buff;
326 GIConv conv;
328 if (!str)
329 return g_string_new("");
331 if (cp_display == cp_source)
332 return g_string_new(str);
334 conv = str_crt_conv_from (cp_source);
336 buff = g_string_new("");
337 str_nconvert (conv, str, len, buff);
338 str_close_conv (conv);
339 return buff;
342 void
343 convert_from_input (char *str)
345 if (!str)
346 return;
348 while (*str) {
349 *str = conv_input[(unsigned char) *str];
350 str++;
354 GString *
355 str_convert_to_input (char *str)
357 return str_nconvert_to_input (str, -1);
360 GString *
361 str_nconvert_to_input (char *str, int len)
363 GString *buff;
364 GIConv conv;
366 if (!str)
367 return g_string_new("");
369 if (cp_display == cp_source)
370 return g_string_new(str);
372 conv = str_crt_conv_to (cp_source);
374 buff = g_string_new("");
375 str_nconvert (conv, str, len, buff);
376 str_close_conv (conv);
377 return buff;
380 unsigned char
381 convert_from_utf_to_current (const char *str)
383 unsigned char buf_ch[6 + 1];
384 unsigned char ch = '.';
385 GIConv conv;
386 const char *cp_to;
388 if (!str)
389 return '.';
391 cp_to = get_codepage_id ( source_codepage );
392 conv = str_crt_conv_to ( cp_to );
394 if (conv != INVALID_CONV) {
395 switch (str_translate_char (conv, str, -1, (char *)buf_ch, sizeof(buf_ch))) {
396 case ESTR_SUCCESS:
397 ch = buf_ch[0];
398 break;
399 case ESTR_PROBLEM:
400 case ESTR_FAILURE:
401 ch = '.';
402 break;
404 str_close_conv (conv);
407 return ch;
411 unsigned char
412 convert_from_utf_to_current_c (const int input_char, GIConv conv)
414 unsigned char str[6 + 1];
415 unsigned char buf_ch[6 + 1];
416 unsigned char ch = '.';
418 int res = 0;
420 res = g_unichar_to_utf8 (input_char, (char *)str);
421 if ( res == 0 ) {
422 return ch;
424 str[res] = '\0';
426 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
427 case ESTR_SUCCESS:
428 ch = buf_ch[0];
429 break;
430 case ESTR_PROBLEM:
431 case ESTR_FAILURE:
432 ch = '.';
433 break;
435 return ch;
439 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
441 unsigned char str[2];
442 unsigned char buf_ch[6 + 1];
443 int ch = '.';
444 int res = 0;
446 str[0] = (unsigned char) input_char;
447 str[1] = '\0';
449 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
450 case ESTR_SUCCESS:
451 res = g_utf8_get_char_validated ((char *)buf_ch, -1);
452 if ( res < 0 ) {
453 ch = buf_ch[0];
454 } else {
455 ch = res;
457 break;
458 case ESTR_PROBLEM:
459 case ESTR_FAILURE:
460 ch = '.';
461 break;
463 return ch;
467 convert_from_8bit_to_utf_c2 (const char input_char)
469 unsigned char str[2];
470 unsigned char buf_ch[6 + 1];
471 int ch = '.';
472 int res = 0;
473 GIConv conv;
474 const char *cp_from;
476 str[0] = (unsigned char) input_char;
477 str[1] = '\0';
479 cp_from = get_codepage_id ( source_codepage );
480 conv = str_crt_conv_to (cp_from);
482 if (conv != INVALID_CONV) {
483 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof(buf_ch))) {
484 case ESTR_SUCCESS:
485 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
486 if ( res < 0 ) {
487 ch = buf_ch[0];
488 } else {
489 ch = res;
491 break;
492 case ESTR_PROBLEM:
493 case ESTR_FAILURE:
494 ch = '.';
495 break;
497 str_close_conv (conv);
499 return ch;
502 #endif /* HAVE_CHARSET */