Refactored IPV4/IPV6 FTP connection setup code
[midnight-commander.git] / src / charsets.c
blob0958f7fb5f28a310f2d8aa6aa620cb18fe9b89fb
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 /** \file charsets.c
21 * \brief Source: Text conversion from one charset to another
24 #include <config.h>
26 #ifdef HAVE_CHARSET
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
36 #include "charsets.h"
37 #include "main.h"
39 int n_codepages = 0;
41 struct codepage_desc *codepages;
43 unsigned char conv_displ[256];
44 unsigned char conv_input[256];
46 const char *cp_display = NULL;
47 const char *cp_source = NULL;
50 int
51 load_codepages_list (void)
53 int result = -1;
54 FILE *f;
55 char *fname;
56 char buf[BUF_MEDIUM];
57 char *default_codepage = NULL;
59 fname = concat_dir_and_file (mc_home, CHARSETS_INDEX);
60 f = fopen (fname, "r");
61 if (f == NULL) {
62 fprintf (stderr, _("Warning: file %s not found\n"), fname);
63 g_free (fname);
65 fname = concat_dir_and_file (mc_home_alt, CHARSETS_INDEX);
66 f = fopen (fname, "r");
67 if (f == NULL) {
68 fprintf (stderr, _("Warning: file %s not found\n"), fname);
69 g_free (fname);
71 /* file is not found, add defaullt codepage */
72 n_codepages = 1;
73 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
74 codepages[0].id = g_strdup ("ASCII");
75 codepages[0].name = g_strdup (_("7-bit ASCII"));
76 return n_codepages;
79 g_free (fname);
81 for (n_codepages = 0; fgets (buf, sizeof (buf), f);)
82 if (buf[0] != '\n' && buf[0] != '\0' && buf[0] != '#')
83 ++n_codepages;
84 rewind (f);
86 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
88 for (n_codepages = 0; fgets (buf, sizeof buf, f);) {
89 /* split string into id and cpname */
90 char *p = buf;
91 size_t buflen = strlen (buf);
93 if (*p == '\n' || *p == '\0' || *p == '#')
94 continue;
96 if (buflen > 0 && buf[buflen - 1] == '\n')
97 buf[buflen - 1] = '\0';
98 while (*p != '\t' && *p != ' ' && *p != '\0')
99 ++p;
100 if (*p == '\0')
101 goto fail;
103 *p++ = '\0';
104 g_strstrip (p);
105 if (*p == '\0')
106 goto fail;
108 if (strcmp (buf, "default") == 0)
109 default_codepage = g_strdup (p);
110 else {
111 codepages[n_codepages].id = g_strdup (buf);
112 codepages[n_codepages].name = g_strdup (p);
113 ++n_codepages;
117 if (default_codepage != NULL) {
118 display_codepage = get_codepage_index (default_codepage);
119 g_free (default_codepage);
122 result = n_codepages;
123 fail:
124 fclose (f);
125 return result;
128 void
129 free_codepages_list (void)
131 if (n_codepages > 0) {
132 int i;
133 for (i = 0; i < n_codepages; i++) {
134 g_free (codepages[i].id);
135 g_free (codepages[i].name);
137 n_codepages = 0;
138 g_free (codepages);
139 codepages = 0;
143 #define OTHER_8BIT "Other_8_bit"
145 const char *
146 get_codepage_id (const int n)
148 return (n < 0) ? OTHER_8BIT : codepages[n].id;
152 get_codepage_index (const char *id)
154 int i;
155 if (strcmp (id, OTHER_8BIT) == 0)
156 return -1;
157 if (codepages == NULL)
158 return -1;
159 for (i = 0; i < n_codepages; i++)
160 if (strcmp (id, codepages[i].id) == 0)
161 return i;
162 return -1;
165 static char
166 translate_character (GIConv cd, char c)
168 gchar *tmp_buff = NULL;
169 gsize bytes_read, bytes_written = 0;
170 const char *ibuf = &c;
171 char ch = UNKNCHAR;
173 int ibuflen = 1;
175 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
176 if ( tmp_buff )
177 ch = tmp_buff[0];
178 g_free (tmp_buff);
179 return ch;
183 * FIXME: This assumes that ASCII is always the first encoding
184 * in mc.charsets
186 #define CP_ASCII 0
188 char *
189 init_translation_table (int cpsource, int cpdisplay)
191 int i;
192 GIConv cd;
194 /* Fill inpit <-> display tables */
196 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay) {
197 for (i = 0; i <= 255; ++i) {
198 conv_displ[i] = i;
199 conv_input[i] = i;
200 cp_source = cp_display;
202 return NULL;
205 for (i = 0; i <= 127; ++i) {
206 conv_displ[i] = i;
207 conv_input[i] = i;
209 cp_source = (char *) codepages[cpsource].id;
210 cp_display = (char *) codepages[cpdisplay].id;
212 /* display <- inpit table */
214 cd = g_iconv_open (cp_display, cp_source);
215 if (cd == INVALID_CONV)
216 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source, cp_display);
218 for (i = 128; i <= 255; ++i)
219 conv_displ[i] = translate_character (cd, i);
221 g_iconv_close (cd);
223 /* inpit <- display table */
225 cd = g_iconv_open (cp_source, cp_display);
226 if (cd == INVALID_CONV)
227 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display, cp_source);
229 for (i = 128; i <= 255; ++i) {
230 unsigned char ch;
231 ch = translate_character (cd, i);
232 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
235 g_iconv_close (cd);
237 return NULL;
240 void
241 convert_to_display (char *str)
243 if (!str)
244 return;
246 while (*str) {
247 *str = conv_displ[(unsigned char) *str];
248 str++;
252 GString *
253 str_convert_to_display (char *str)
255 return str_nconvert_to_display (str, -1);
259 GString *
260 str_nconvert_to_display (char *str, int len)
262 GString *buff;
263 GIConv conv;
265 if (!str)
266 return g_string_new("");
268 if (cp_display == cp_source)
269 return g_string_new(str);
271 conv = str_crt_conv_from (cp_source);
273 buff = g_string_new("");
274 str_nconvert (conv, str, len, buff);
275 str_close_conv (conv);
276 return buff;
279 void
280 convert_from_input (char *str)
282 if (!str)
283 return;
285 while (*str) {
286 *str = conv_input[(unsigned char) *str];
287 str++;
291 GString *
292 str_convert_to_input (char *str)
294 return str_nconvert_to_input (str, -1);
297 GString *
298 str_nconvert_to_input (char *str, int len)
300 GString *buff;
301 GIConv conv;
303 if (!str)
304 return g_string_new("");
306 if (cp_display == cp_source)
307 return g_string_new(str);
309 conv = str_crt_conv_to (cp_source);
311 buff = g_string_new("");
312 str_nconvert (conv, str, len, buff);
313 str_close_conv (conv);
314 return buff;
317 unsigned char
318 convert_from_utf_to_current (const char *str)
320 unsigned char buf_ch[6 + 1];
321 unsigned char ch = '.';
322 GIConv conv;
323 const char *cp_to;
325 if (!str)
326 return '.';
328 cp_to = get_codepage_id ( source_codepage );
329 conv = str_crt_conv_to ( cp_to );
331 if (conv != INVALID_CONV) {
332 switch (str_translate_char (conv, str, -1, (char *)buf_ch, sizeof(buf_ch))) {
333 case ESTR_SUCCESS:
334 ch = buf_ch[0];
335 break;
336 case ESTR_PROBLEM:
337 case ESTR_FAILURE:
338 ch = '.';
339 break;
341 str_close_conv (conv);
344 return ch;
348 unsigned char
349 convert_from_utf_to_current_c (const int input_char, GIConv conv)
351 unsigned char str[6 + 1];
352 unsigned char buf_ch[6 + 1];
353 unsigned char ch = '.';
355 int res = 0;
357 res = g_unichar_to_utf8 (input_char, (char *)str);
358 if ( res == 0 ) {
359 return ch;
361 str[res] = '\0';
363 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
364 case ESTR_SUCCESS:
365 ch = buf_ch[0];
366 break;
367 case ESTR_PROBLEM:
368 case ESTR_FAILURE:
369 ch = '.';
370 break;
372 return ch;
376 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
378 unsigned char str[2];
379 unsigned char buf_ch[6 + 1];
380 int ch = '.';
381 int res = 0;
383 str[0] = (unsigned char) input_char;
384 str[1] = '\0';
386 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
387 case ESTR_SUCCESS:
388 res = g_utf8_get_char_validated ((char *)buf_ch, -1);
389 if ( res < 0 ) {
390 ch = buf_ch[0];
391 } else {
392 ch = res;
394 break;
395 case ESTR_PROBLEM:
396 case ESTR_FAILURE:
397 ch = '.';
398 break;
400 return ch;
404 convert_from_8bit_to_utf_c2 (const char input_char)
406 unsigned char str[2];
407 unsigned char buf_ch[6 + 1];
408 int ch = '.';
409 int res = 0;
410 GIConv conv;
411 const char *cp_from;
413 str[0] = (unsigned char) input_char;
414 str[1] = '\0';
416 cp_from = get_codepage_id ( source_codepage );
417 conv = str_crt_conv_to (cp_from);
419 if (conv != INVALID_CONV) {
420 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof(buf_ch))) {
421 case ESTR_SUCCESS:
422 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
423 if ( res < 0 ) {
424 ch = buf_ch[0];
425 } else {
426 ch = res;
428 break;
429 case ESTR_PROBLEM:
430 case ESTR_FAILURE:
431 ch = '.';
432 break;
434 str_close_conv (conv);
436 return ch;
439 #endif /* HAVE_CHARSET */