fix: input in 8-bit locales
[midnight-commander.git] / src / charsets.c
blob50b3e579b9064b5c316041332739144154d89ee9
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 #include <config.h>
22 #ifdef HAVE_CHARSET
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #include <iconv.h>
30 #include "global.h"
31 #include "charsets.h"
32 #include "strutil.h" /* utf-8 functions */
33 #include "main.h"
35 int n_codepages = 0;
37 struct codepage_desc *codepages;
39 unsigned char conv_displ[256];
40 unsigned char conv_input[256];
42 static char *cp_display = NULL;
43 static char *cp_source = NULL;
46 int
47 load_codepages_list (void)
49 int result = -1;
50 FILE *f;
51 char *fname;
52 char buf[256];
53 extern char *mc_home;
54 extern int display_codepage;
55 char *default_codepage = NULL;
57 fname = concat_dir_and_file (mc_home, CHARSETS_INDEX);
58 if (!(f = fopen (fname, "r"))) {
59 fprintf (stderr, _("Warning: file %s not found\n"), fname);
60 g_free (fname);
61 return -1;
63 g_free (fname);
65 for (n_codepages = 0; fgets (buf, sizeof (buf), f);)
66 if (buf[0] != '\n' && buf[0] != '\0' && buf[0] != '#')
67 ++n_codepages;
68 rewind (f);
70 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
72 for (n_codepages = 0; fgets (buf, sizeof buf, f);) {
73 /* split string into id and cpname */
74 char *p = buf;
75 int buflen = strlen (buf);
77 if (*p == '\n' || *p == '\0' || *p == '#')
78 continue;
80 if (buflen > 0 && buf[buflen - 1] == '\n')
81 buf[buflen - 1] = '\0';
82 while (*p != '\t' && *p != ' ' && *p != '\0')
83 ++p;
84 if (*p == '\0')
85 goto fail;
87 *p++ = 0;
89 while (*p == '\t' || *p == ' ')
90 ++p;
91 if (*p == '\0')
92 goto fail;
94 if (strcmp (buf, "default") == 0) {
95 default_codepage = g_strdup (p);
96 continue;
99 codepages[n_codepages].id = g_strdup (buf);
100 codepages[n_codepages].name = g_strdup (p);
101 ++n_codepages;
104 if (default_codepage) {
105 display_codepage = get_codepage_index (default_codepage);
106 g_free (default_codepage);
109 result = n_codepages;
110 fail:
111 fclose (f);
112 return result;
115 void
116 free_codepages_list (void)
118 if (n_codepages > 0) {
119 int i;
120 for (i = 0; i < n_codepages; i++) {
121 g_free (codepages[i].id);
122 g_free (codepages[i].name);
124 n_codepages = 0;
125 g_free (codepages);
126 codepages = 0;
130 #define OTHER_8BIT "Other_8_bit"
132 const char *
133 get_codepage_id (int n)
135 return (n < 0) ? OTHER_8BIT : codepages[n].id;
139 get_codepage_index (const char *id)
141 int i;
142 if (strcmp (id, OTHER_8BIT) == 0)
143 return -1;
144 for (i = 0; codepages[i].id; ++i)
145 if (strcmp (id, codepages[i].id) == 0)
146 return i;
147 return -1;
150 static char
151 translate_character (iconv_t cd, char c)
153 char outbuf[4], *obuf;
154 size_t ibuflen, obuflen, count;
156 ICONV_CONST char *ibuf = &c;
157 obuf = outbuf;
158 ibuflen = 1;
159 obuflen = 4;
161 count = iconv (cd, &ibuf, &ibuflen, &obuf, &obuflen);
162 if (count != ((size_t) -1) && ibuflen == 0)
163 return outbuf[0];
165 return UNKNCHAR;
168 char errbuf[255];
171 * FIXME: This assumes that ASCII is always the first encoding
172 * in mc.charsets
174 #define CP_ASCII 0
176 const char *
177 init_translation_table (int cpsource, int cpdisplay)
179 int i;
180 iconv_t cd;
181 const char *cpsour, *cpdisp;
183 /* Fill inpit <-> display tables */
185 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay) {
186 for (i = 0; i <= 255; ++i) {
187 conv_displ[i] = i;
188 conv_input[i] = i;
190 return NULL;
193 for (i = 0; i <= 127; ++i) {
194 conv_displ[i] = i;
195 conv_input[i] = i;
198 cp_display = cpsour = codepages[cpsource].id;
199 cp_source = cpdisp = codepages[cpdisplay].id;
201 /* display <- inpit table */
203 cd = iconv_open (cpdisp, cpsour);
204 if (cd == (iconv_t) - 1) {
205 g_snprintf (errbuf, sizeof (errbuf),
206 _("Cannot translate from %s to %s"), cpsour, cpdisp);
207 return errbuf;
210 for (i = 128; i <= 255; ++i)
211 conv_displ[i] = translate_character (cd, i);
213 iconv_close (cd);
215 /* inpit <- display table */
217 cd = iconv_open (cpsour, cpdisp);
218 if (cd == (iconv_t) - 1) {
219 g_snprintf (errbuf, sizeof (errbuf),
220 _("Cannot translate from %s to %s"), cpdisp, cpsour);
221 return errbuf;
224 for (i = 128; i <= 255; ++i) {
225 unsigned char ch;
226 ch = translate_character (cd, i);
227 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
230 iconv_close (cd);
232 return NULL;
235 void
236 convert_to_display (char *str)
238 if (!str)
239 return;
241 while (*str) {
242 *str = conv_displ[(unsigned char) *str];
243 str++;
247 GString *
248 str_convert_to_display (char *str)
250 GString *buff;
251 GIConv conv;
253 if (!str)
254 return NULL;
256 if (cp_display == cp_source)
257 return g_string_new(str);
259 conv = str_crt_conv_from (cp_display);
261 buff = g_string_new("");
262 str_convert (conv, str, buff);
263 return buff;
266 void
267 convert_from_input (char *str)
269 if (!str)
270 return;
272 while (*str) {
273 *str = conv_input[(unsigned char) *str];
274 str++;
278 GString *
279 str_convert_from_input (char *str)
281 GString *buff;
282 GIConv conv;
284 if (!str)
285 return NULL;
287 if (cp_display == cp_source)
288 return g_string_new(str);
290 conv = str_crt_conv_to (cp_display);
292 buff = g_string_new("");
293 str_convert (conv, str, buff);
294 return buff;
297 unsigned char
298 convert_from_utf_to_current (const char *str)
301 if (!str)
302 return '.';
304 unsigned char buf_ch[6 + 1];
305 unsigned char ch = '.';
306 char *cp_to = NULL;
307 GIConv conv;
309 cp_to = get_codepage_id ( source_codepage );
310 conv = str_crt_conv_to ( cp_to );
312 if (conv != INVALID_CONV) {
313 switch (str_translate_char (conv, str, -1, buf_ch, sizeof(buf_ch))) {
314 case 0:
315 ch = buf_ch[0];
316 break;
317 case 1:
318 case 2:
319 ch = '.';
320 break;
322 str_close_conv (conv);
325 return ch;
329 unsigned char
330 convert_from_utf_to_current_c (const int input_char)
332 unsigned char str[6 + 1];
333 unsigned char buf_ch[6 + 1];
334 unsigned char ch = '.';
336 char *cp_from = NULL;
337 GIConv conv;
338 GString *translated_data;
339 int res = 0;
341 res = g_unichar_to_utf8 (input_char, str);
342 if ( res == 0 ) {
343 return ch;
345 str[6] = '\0';
347 cp_from = get_codepage_id ( source_codepage );
348 conv = str_crt_conv_from (cp_from);
350 if (conv != INVALID_CONV) {
351 switch (str_translate_char (conv, str, sizeof(str), buf_ch, sizeof(buf_ch))) {
352 case 0:
353 ch = buf_ch[0];
354 break;
355 case 1:
356 ch = '.';
357 break;
358 case 2:
359 ch = '.';
361 str_close_conv (conv);
363 return ch;
367 #endif /* HAVE_CHARSET */