1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
34 #include "strutil.h" /* utf-8 functions */
36 #include "util.h" /* concat_dir_and_file() */
40 struct codepage_desc
*codepages
;
42 unsigned char conv_displ
[256];
43 unsigned char conv_input
[256];
45 const char *cp_display
= NULL
;
46 const char *cp_source
= NULL
;
50 load_codepages_list (void)
56 char *default_codepage
= NULL
;
58 fname
= concat_dir_and_file (mc_home
, CHARSETS_INDEX
);
59 if (!(f
= fopen (fname
, "r"))) {
60 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
63 fname
= concat_dir_and_file (mc_home_alt
, CHARSETS_INDEX
);
64 if (!(f
= fopen (fname
, "r"))) {
65 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
68 /* file is not found, add defaullt codepage */
70 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
71 codepages
[0].id
= g_strdup ("ASCII");
72 codepages
[0].name
= g_strdup (_("7-bit ASCII"));
78 for (n_codepages
= 0; fgets (buf
, sizeof (buf
), f
);)
79 if (buf
[0] != '\n' && buf
[0] != '\0' && buf
[0] != '#')
83 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
85 for (n_codepages
= 0; fgets (buf
, sizeof buf
, f
);) {
86 /* split string into id and cpname */
88 size_t buflen
= strlen (buf
);
90 if (*p
== '\n' || *p
== '\0' || *p
== '#')
93 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
94 buf
[buflen
- 1] = '\0';
95 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
105 if (strcmp (buf
, "default") == 0)
106 default_codepage
= g_strdup (p
);
108 codepages
[n_codepages
].id
= g_strdup (buf
);
109 codepages
[n_codepages
].name
= g_strdup (p
);
114 if (default_codepage
!= NULL
) {
115 display_codepage
= get_codepage_index (default_codepage
);
116 g_free (default_codepage
);
119 result
= n_codepages
;
126 free_codepages_list (void)
128 if (n_codepages
> 0) {
130 for (i
= 0; i
< n_codepages
; i
++) {
131 g_free (codepages
[i
].id
);
132 g_free (codepages
[i
].name
);
140 #define OTHER_8BIT "Other_8_bit"
143 get_codepage_id (const int n
)
145 return (n
< 0) ? OTHER_8BIT
: codepages
[n
].id
;
149 get_codepage_index (const char *id
)
152 if (strcmp (id
, OTHER_8BIT
) == 0)
154 if (codepages
== NULL
)
156 for (i
= 0; codepages
[i
].id
; ++i
)
157 if (strcmp (id
, codepages
[i
].id
) == 0)
163 translate_character (GIConv cd
, char c
)
165 gchar
*tmp_buff
= NULL
;
166 gsize bytes_read
, bytes_written
= 0;
167 const char *ibuf
= &c
;
172 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
182 * FIXME: This assumes that ASCII is always the first encoding
188 init_translation_table (int cpsource
, int cpdisplay
)
193 /* Fill inpit <-> display tables */
195 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
196 for (i
= 0; i
<= 255; ++i
) {
199 cp_source
= cp_display
;
204 for (i
= 0; i
<= 127; ++i
) {
208 cp_source
= (char *) codepages
[cpsource
].id
;
209 cp_display
= (char *) codepages
[cpdisplay
].id
;
211 /* display <- inpit table */
213 cd
= g_iconv_open (cp_display
, cp_source
);
214 if (cd
== INVALID_CONV
) {
215 g_snprintf (errbuf
, sizeof (errbuf
),
216 _("Cannot translate from %s to %s"), cp_source
, cp_display
);
220 for (i
= 128; i
<= 255; ++i
)
221 conv_displ
[i
] = translate_character (cd
, i
);
225 /* inpit <- display table */
227 cd
= g_iconv_open (cp_source
, cp_display
);
228 if (cd
== INVALID_CONV
) {
229 g_snprintf (errbuf
, sizeof (errbuf
),
230 _("Cannot translate from %s to %s"), cp_display
, cp_source
);
234 for (i
= 128; i
<= 255; ++i
) {
236 ch
= translate_character (cd
, i
);
237 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
246 convert_to_display (char *str
)
252 *str
= conv_displ
[(unsigned char) *str
];
258 str_convert_to_display (char *str
)
260 return str_nconvert_to_display (str
, -1);
265 str_nconvert_to_display (char *str
, int len
)
271 return g_string_new("");
273 if (cp_display
== cp_source
)
274 return g_string_new(str
);
276 conv
= str_crt_conv_from (cp_source
);
278 buff
= g_string_new("");
279 str_nconvert (conv
, str
, len
, buff
);
280 str_close_conv (conv
);
285 convert_from_input (char *str
)
291 *str
= conv_input
[(unsigned char) *str
];
297 str_convert_to_input (char *str
)
299 return str_nconvert_to_input (str
, -1);
303 str_nconvert_to_input (char *str
, int len
)
309 return g_string_new("");
311 if (cp_display
== cp_source
)
312 return g_string_new(str
);
314 conv
= str_crt_conv_to (cp_source
);
316 buff
= g_string_new("");
317 str_nconvert (conv
, str
, len
, buff
);
318 str_close_conv (conv
);
323 convert_from_utf_to_current (const char *str
)
325 unsigned char buf_ch
[6 + 1];
326 unsigned char ch
= '.';
333 cp_to
= get_codepage_id ( source_codepage
);
334 conv
= str_crt_conv_to ( cp_to
);
336 if (conv
!= INVALID_CONV
) {
337 switch (str_translate_char (conv
, str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
346 str_close_conv (conv
);
354 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
356 unsigned char str
[6 + 1];
357 unsigned char buf_ch
[6 + 1];
358 unsigned char ch
= '.';
362 res
= g_unichar_to_utf8 (input_char
, (char *)str
);
368 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
381 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
383 unsigned char str
[2];
384 unsigned char buf_ch
[6 + 1];
388 str
[0] = (unsigned char) input_char
;
391 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
393 res
= g_utf8_get_char_validated ((char *)buf_ch
, -1);
409 convert_from_8bit_to_utf_c2 (const char input_char
)
411 unsigned char str
[2];
412 unsigned char buf_ch
[6 + 1];
418 str
[0] = (unsigned char) input_char
;
421 cp_from
= get_codepage_id ( source_codepage
);
422 conv
= str_crt_conv_to (cp_from
);
424 if (conv
!= INVALID_CONV
) {
425 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof(buf_ch
))) {
427 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
439 str_close_conv (conv
);
444 #endif /* HAVE_CHARSET */