1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
41 struct codepage_desc
*codepages
;
43 unsigned char conv_displ
[256];
44 unsigned char conv_input
[256];
46 const char *cp_display
= NULL
;
47 const char *cp_source
= NULL
;
51 load_codepages_list (void)
57 char *default_codepage
= NULL
;
59 fname
= concat_dir_and_file (mc_home
, CHARSETS_INDEX
);
60 f
= fopen (fname
, "r");
62 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
65 fname
= concat_dir_and_file (mc_home_alt
, CHARSETS_INDEX
);
66 f
= fopen (fname
, "r");
68 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
71 /* file is not found, add defaullt codepage */
73 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
74 codepages
[0].id
= g_strdup ("ASCII");
75 codepages
[0].name
= g_strdup (_("7-bit ASCII"));
81 for (n_codepages
= 0; fgets (buf
, sizeof (buf
), f
);)
82 if (buf
[0] != '\n' && buf
[0] != '\0' && buf
[0] != '#')
86 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
88 for (n_codepages
= 0; fgets (buf
, sizeof buf
, f
);) {
89 /* split string into id and cpname */
91 size_t buflen
= strlen (buf
);
93 if (*p
== '\n' || *p
== '\0' || *p
== '#')
96 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
97 buf
[buflen
- 1] = '\0';
98 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
108 if (strcmp (buf
, "default") == 0)
109 default_codepage
= g_strdup (p
);
111 codepages
[n_codepages
].id
= g_strdup (buf
);
112 codepages
[n_codepages
].name
= g_strdup (p
);
117 if (default_codepage
!= NULL
) {
118 display_codepage
= get_codepage_index (default_codepage
);
119 g_free (default_codepage
);
122 result
= n_codepages
;
129 free_codepages_list (void)
131 if (n_codepages
> 0) {
133 for (i
= 0; i
< n_codepages
; i
++) {
134 g_free (codepages
[i
].id
);
135 g_free (codepages
[i
].name
);
143 #define OTHER_8BIT "Other_8_bit"
146 get_codepage_id (const int n
)
148 return (n
< 0) ? OTHER_8BIT
: codepages
[n
].id
;
152 get_codepage_index (const char *id
)
155 if (strcmp (id
, OTHER_8BIT
) == 0)
157 if (codepages
== NULL
)
159 for (i
= 0; i
< n_codepages
; i
++)
160 if (strcmp (id
, codepages
[i
].id
) == 0)
166 translate_character (GIConv cd
, char c
)
168 gchar
*tmp_buff
= NULL
;
169 gsize bytes_read
, bytes_written
= 0;
170 const char *ibuf
= &c
;
175 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
183 * FIXME: This assumes that ASCII is always the first encoding
189 init_translation_table (int cpsource
, int cpdisplay
)
194 /* Fill inpit <-> display tables */
196 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
197 for (i
= 0; i
<= 255; ++i
) {
200 cp_source
= cp_display
;
205 for (i
= 0; i
<= 127; ++i
) {
209 cp_source
= (char *) codepages
[cpsource
].id
;
210 cp_display
= (char *) codepages
[cpdisplay
].id
;
212 /* display <- inpit table */
214 cd
= g_iconv_open (cp_display
, cp_source
);
215 if (cd
== INVALID_CONV
)
216 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source
, cp_display
);
218 for (i
= 128; i
<= 255; ++i
)
219 conv_displ
[i
] = translate_character (cd
, i
);
223 /* inpit <- display table */
225 cd
= g_iconv_open (cp_source
, cp_display
);
226 if (cd
== INVALID_CONV
)
227 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display
, cp_source
);
229 for (i
= 128; i
<= 255; ++i
) {
231 ch
= translate_character (cd
, i
);
232 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
241 convert_to_display (char *str
)
247 *str
= conv_displ
[(unsigned char) *str
];
253 str_convert_to_display (char *str
)
255 return str_nconvert_to_display (str
, -1);
260 str_nconvert_to_display (char *str
, int len
)
266 return g_string_new("");
268 if (cp_display
== cp_source
)
269 return g_string_new(str
);
271 conv
= str_crt_conv_from (cp_source
);
273 buff
= g_string_new("");
274 str_nconvert (conv
, str
, len
, buff
);
275 str_close_conv (conv
);
280 convert_from_input (char *str
)
286 *str
= conv_input
[(unsigned char) *str
];
292 str_convert_to_input (char *str
)
294 return str_nconvert_to_input (str
, -1);
298 str_nconvert_to_input (char *str
, int len
)
304 return g_string_new("");
306 if (cp_display
== cp_source
)
307 return g_string_new(str
);
309 conv
= str_crt_conv_to (cp_source
);
311 buff
= g_string_new("");
312 str_nconvert (conv
, str
, len
, buff
);
313 str_close_conv (conv
);
318 convert_from_utf_to_current (const char *str
)
320 unsigned char buf_ch
[6 + 1];
321 unsigned char ch
= '.';
328 cp_to
= get_codepage_id ( source_codepage
);
329 conv
= str_crt_conv_to ( cp_to
);
331 if (conv
!= INVALID_CONV
) {
332 switch (str_translate_char (conv
, str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
341 str_close_conv (conv
);
349 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
351 unsigned char str
[6 + 1];
352 unsigned char buf_ch
[6 + 1];
353 unsigned char ch
= '.';
357 res
= g_unichar_to_utf8 (input_char
, (char *)str
);
363 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
376 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
378 unsigned char str
[2];
379 unsigned char buf_ch
[6 + 1];
383 str
[0] = (unsigned char) input_char
;
386 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
388 res
= g_utf8_get_char_validated ((char *)buf_ch
, -1);
404 convert_from_8bit_to_utf_c2 (const char input_char
)
406 unsigned char str
[2];
407 unsigned char buf_ch
[6 + 1];
413 str
[0] = (unsigned char) input_char
;
416 cp_from
= get_codepage_id ( source_codepage
);
417 conv
= str_crt_conv_to (cp_from
);
419 if (conv
!= INVALID_CONV
) {
420 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof(buf_ch
))) {
422 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
434 str_close_conv (conv
);
439 #endif /* HAVE_CHARSET */