Merge branch '44_more_functionally_u7z'
[pantumic.git] / src / charsets.c
blob4b8c3bc9bf2cc5199b79081e4afe1b0ff8966125
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 /** \file charsets.c
21 * \brief Source: Text conversion from one charset to another
24 #include <config.h>
26 #ifdef HAVE_CHARSET
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include "global.h"
33 #include "charsets.h"
34 #include "strutil.h" /* utf-8 functions */
35 #include "main.h"
36 #include "util.h" /* concat_dir_and_file() */
38 int n_codepages = 0;
40 struct codepage_desc *codepages;
42 unsigned char conv_displ[256];
43 unsigned char conv_input[256];
45 const char *cp_display = NULL;
46 const char *cp_source = NULL;
49 int
50 load_codepages_list (void)
52 int result = -1;
53 FILE *f;
54 char *fname;
55 char buf[BUF_MEDIUM];
56 char *default_codepage = NULL;
58 fname = concat_dir_and_file (mc_home, CHARSETS_INDEX);
59 if (!(f = fopen (fname, "r"))) {
60 fprintf (stderr, _("Warning: file %s not found\n"), fname);
61 g_free (fname);
63 fname = concat_dir_and_file (mc_home_alt, CHARSETS_INDEX);
64 if (!(f = fopen (fname, "r"))) {
65 fprintf (stderr, _("Warning: file %s not found\n"), fname);
66 g_free (fname);
68 /* file is not found, add defaullt codepage */
69 n_codepages = 1;
70 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
71 codepages[0].id = g_strdup ("ASCII");
72 codepages[0].name = g_strdup (_("7-bit ASCII"));
73 return n_codepages;
76 g_free (fname);
78 for (n_codepages = 0; fgets (buf, sizeof (buf), f);)
79 if (buf[0] != '\n' && buf[0] != '\0' && buf[0] != '#')
80 ++n_codepages;
81 rewind (f);
83 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
85 for (n_codepages = 0; fgets (buf, sizeof buf, f);) {
86 /* split string into id and cpname */
87 char *p = buf;
88 size_t buflen = strlen (buf);
90 if (*p == '\n' || *p == '\0' || *p == '#')
91 continue;
93 if (buflen > 0 && buf[buflen - 1] == '\n')
94 buf[buflen - 1] = '\0';
95 while (*p != '\t' && *p != ' ' && *p != '\0')
96 ++p;
97 if (*p == '\0')
98 goto fail;
100 *p++ = '\0';
101 g_strstrip (p);
102 if (*p == '\0')
103 goto fail;
105 if (strcmp (buf, "default") == 0)
106 default_codepage = g_strdup (p);
107 else {
108 codepages[n_codepages].id = g_strdup (buf);
109 codepages[n_codepages].name = g_strdup (p);
110 ++n_codepages;
114 if (default_codepage != NULL) {
115 display_codepage = get_codepage_index (default_codepage);
116 g_free (default_codepage);
119 result = n_codepages;
120 fail:
121 fclose (f);
122 return result;
125 void
126 free_codepages_list (void)
128 if (n_codepages > 0) {
129 int i;
130 for (i = 0; i < n_codepages; i++) {
131 g_free (codepages[i].id);
132 g_free (codepages[i].name);
134 n_codepages = 0;
135 g_free (codepages);
136 codepages = 0;
140 #define OTHER_8BIT "Other_8_bit"
142 const char *
143 get_codepage_id (const int n)
145 return (n < 0) ? OTHER_8BIT : codepages[n].id;
149 get_codepage_index (const char *id)
151 int i;
152 if (strcmp (id, OTHER_8BIT) == 0)
153 return -1;
154 if (codepages == NULL)
155 return -1;
156 for (i = 0; codepages[i].id; ++i)
157 if (strcmp (id, codepages[i].id) == 0)
158 return i;
159 return -1;
162 static char
163 translate_character (GIConv cd, char c)
165 gchar *tmp_buff = NULL;
166 gsize bytes_read, bytes_written = 0;
167 const char *ibuf = &c;
168 char ch = UNKNCHAR;
170 int ibuflen = 1;
172 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
173 if ( tmp_buff )
174 ch = tmp_buff[0];
175 g_free (tmp_buff);
176 return ch;
179 char errbuf[255];
182 * FIXME: This assumes that ASCII is always the first encoding
183 * in mc.charsets
185 #define CP_ASCII 0
187 const char *
188 init_translation_table (int cpsource, int cpdisplay)
190 int i;
191 GIConv cd;
193 /* Fill inpit <-> display tables */
195 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay) {
196 for (i = 0; i <= 255; ++i) {
197 conv_displ[i] = i;
198 conv_input[i] = i;
199 cp_source = cp_display;
201 return NULL;
204 for (i = 0; i <= 127; ++i) {
205 conv_displ[i] = i;
206 conv_input[i] = i;
208 cp_source = (char *) codepages[cpsource].id;
209 cp_display = (char *) codepages[cpdisplay].id;
211 /* display <- inpit table */
213 cd = g_iconv_open (cp_display, cp_source);
214 if (cd == INVALID_CONV) {
215 g_snprintf (errbuf, sizeof (errbuf),
216 _("Cannot translate from %s to %s"), cp_source, cp_display);
217 return errbuf;
220 for (i = 128; i <= 255; ++i)
221 conv_displ[i] = translate_character (cd, i);
223 g_iconv_close (cd);
225 /* inpit <- display table */
227 cd = g_iconv_open (cp_source, cp_display);
228 if (cd == INVALID_CONV) {
229 g_snprintf (errbuf, sizeof (errbuf),
230 _("Cannot translate from %s to %s"), cp_display, cp_source);
231 return errbuf;
234 for (i = 128; i <= 255; ++i) {
235 unsigned char ch;
236 ch = translate_character (cd, i);
237 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
240 g_iconv_close (cd);
242 return NULL;
245 void
246 convert_to_display (char *str)
248 if (!str)
249 return;
251 while (*str) {
252 *str = conv_displ[(unsigned char) *str];
253 str++;
257 GString *
258 str_convert_to_display (char *str)
260 return str_nconvert_to_display (str, -1);
264 GString *
265 str_nconvert_to_display (char *str, int len)
267 GString *buff;
268 GIConv conv;
270 if (!str)
271 return g_string_new("");
273 if (cp_display == cp_source)
274 return g_string_new(str);
276 conv = str_crt_conv_from (cp_source);
278 buff = g_string_new("");
279 str_nconvert (conv, str, len, buff);
280 str_close_conv (conv);
281 return buff;
284 void
285 convert_from_input (char *str)
287 if (!str)
288 return;
290 while (*str) {
291 *str = conv_input[(unsigned char) *str];
292 str++;
296 GString *
297 str_convert_to_input (char *str)
299 return str_nconvert_to_input (str, -1);
302 GString *
303 str_nconvert_to_input (char *str, int len)
305 GString *buff;
306 GIConv conv;
308 if (!str)
309 return g_string_new("");
311 if (cp_display == cp_source)
312 return g_string_new(str);
314 conv = str_crt_conv_to (cp_source);
316 buff = g_string_new("");
317 str_nconvert (conv, str, len, buff);
318 str_close_conv (conv);
319 return buff;
322 unsigned char
323 convert_from_utf_to_current (const char *str)
325 unsigned char buf_ch[6 + 1];
326 unsigned char ch = '.';
327 GIConv conv;
328 const char *cp_to;
330 if (!str)
331 return '.';
333 cp_to = get_codepage_id ( source_codepage );
334 conv = str_crt_conv_to ( cp_to );
336 if (conv != INVALID_CONV) {
337 switch (str_translate_char (conv, str, -1, (char *)buf_ch, sizeof(buf_ch))) {
338 case ESTR_SUCCESS:
339 ch = buf_ch[0];
340 break;
341 case ESTR_PROBLEM:
342 case ESTR_FAILURE:
343 ch = '.';
344 break;
346 str_close_conv (conv);
349 return ch;
353 unsigned char
354 convert_from_utf_to_current_c (const int input_char, GIConv conv)
356 unsigned char str[6 + 1];
357 unsigned char buf_ch[6 + 1];
358 unsigned char ch = '.';
360 int res = 0;
362 res = g_unichar_to_utf8 (input_char, (char *)str);
363 if ( res == 0 ) {
364 return ch;
366 str[res] = '\0';
368 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
369 case ESTR_SUCCESS:
370 ch = buf_ch[0];
371 break;
372 case ESTR_PROBLEM:
373 case ESTR_FAILURE:
374 ch = '.';
375 break;
377 return ch;
381 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
383 unsigned char str[2];
384 unsigned char buf_ch[6 + 1];
385 int ch = '.';
386 int res = 0;
388 str[0] = (unsigned char) input_char;
389 str[1] = '\0';
391 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
392 case ESTR_SUCCESS:
393 res = g_utf8_get_char_validated ((char *)buf_ch, -1);
394 if ( res < 0 ) {
395 ch = buf_ch[0];
396 } else {
397 ch = res;
399 break;
400 case ESTR_PROBLEM:
401 case ESTR_FAILURE:
402 ch = '.';
403 break;
405 return ch;
409 convert_from_8bit_to_utf_c2 (const char input_char)
411 unsigned char str[2];
412 unsigned char buf_ch[6 + 1];
413 int ch = '.';
414 int res = 0;
415 GIConv conv;
416 const char *cp_from;
418 str[0] = (unsigned char) input_char;
419 str[1] = '\0';
421 cp_from = get_codepage_id ( source_codepage );
422 conv = str_crt_conv_to (cp_from);
424 if (conv != INVALID_CONV) {
425 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof(buf_ch))) {
426 case ESTR_SUCCESS:
427 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
428 if ( res < 0 ) {
429 ch = buf_ch[0];
430 } else {
431 ch = res;
433 break;
434 case ESTR_PROBLEM:
435 case ESTR_FAILURE:
436 ch = '.';
437 break;
439 str_close_conv (conv);
441 return ch;
444 #endif /* HAVE_CHARSET */