1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
41 struct codepage_desc
*codepages
;
43 unsigned char conv_displ
[256];
44 unsigned char conv_input
[256];
46 const char *cp_display
= NULL
;
47 const char *cp_source
= NULL
;
51 load_codepages_list (void)
57 char *default_codepage
= NULL
;
59 fname
= concat_dir_and_file (mc_home
, CHARSETS_INDEX
);
60 f
= fopen (fname
, "r");
62 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
65 fname
= concat_dir_and_file (mc_home_alt
, CHARSETS_INDEX
);
66 f
= fopen (fname
, "r");
68 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
71 /* file is not found, add defaullt codepage */
73 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
74 codepages
[0].id
= g_strdup ("ASCII");
75 codepages
[0].name
= g_strdup (_("7-bit ASCII"));
81 for (n_codepages
= 0; fgets (buf
, sizeof (buf
), f
);)
82 if (buf
[0] != '\n' && buf
[0] != '\0' && buf
[0] != '#')
86 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
88 for (n_codepages
= 0; fgets (buf
, sizeof buf
, f
);) {
89 /* split string into id and cpname */
91 size_t buflen
= strlen (buf
);
93 if (*p
== '\n' || *p
== '\0' || *p
== '#')
96 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
97 buf
[buflen
- 1] = '\0';
98 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
108 if (strcmp (buf
, "default") == 0)
109 default_codepage
= g_strdup (p
);
111 codepages
[n_codepages
].id
= g_strdup (buf
);
112 codepages
[n_codepages
].name
= g_strdup (p
);
117 if (default_codepage
!= NULL
) {
118 display_codepage
= get_codepage_index (default_codepage
);
119 g_free (default_codepage
);
122 result
= n_codepages
;
129 free_codepages_list (void)
131 if (n_codepages
> 0) {
133 for (i
= 0; i
< n_codepages
; i
++) {
134 g_free (codepages
[i
].id
);
135 g_free (codepages
[i
].name
);
143 #define OTHER_8BIT "Other_8_bit"
146 get_codepage_id (const int n
)
148 return (n
< 0) ? OTHER_8BIT
: codepages
[n
].id
;
152 get_codepage_index (const char *id
)
155 if (strcmp (id
, OTHER_8BIT
) == 0)
157 if (codepages
== NULL
)
159 for (i
= 0; i
< n_codepages
; i
++)
160 if (strcmp (id
, codepages
[i
].id
) == 0)
165 /** Check if specified encoding can be used in mc.
166 * @param encoding name of encoding
167 * @returns TRUE if encoding has supported by mc, FALSE otherwise
170 is_supported_encoding (const char *encoding
)
172 gboolean result
= FALSE
;
175 for (t
= 0; t
< (size_t) n_codepages
; t
++)
176 result
|= (g_ascii_strncasecmp (encoding
, codepages
[t
].id
,
177 strlen (codepages
[t
].id
)) == 0);
183 translate_character (GIConv cd
, char c
)
185 gchar
*tmp_buff
= NULL
;
186 gsize bytes_read
, bytes_written
= 0;
187 const char *ibuf
= &c
;
192 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
200 * FIXME: This assumes that ASCII is always the first encoding
206 init_translation_table (int cpsource
, int cpdisplay
)
211 /* Fill inpit <-> display tables */
213 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
214 for (i
= 0; i
<= 255; ++i
) {
217 cp_source
= cp_display
;
222 for (i
= 0; i
<= 127; ++i
) {
226 cp_source
= (char *) codepages
[cpsource
].id
;
227 cp_display
= (char *) codepages
[cpdisplay
].id
;
229 /* display <- inpit table */
231 cd
= g_iconv_open (cp_display
, cp_source
);
232 if (cd
== INVALID_CONV
)
233 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source
, cp_display
);
235 for (i
= 128; i
<= 255; ++i
)
236 conv_displ
[i
] = translate_character (cd
, i
);
240 /* inpit <- display table */
242 cd
= g_iconv_open (cp_source
, cp_display
);
243 if (cd
== INVALID_CONV
)
244 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display
, cp_source
);
246 for (i
= 128; i
<= 255; ++i
) {
248 ch
= translate_character (cd
, i
);
249 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
258 convert_to_display (char *str
)
264 *str
= conv_displ
[(unsigned char) *str
];
270 str_convert_to_display (char *str
)
272 return str_nconvert_to_display (str
, -1);
277 str_nconvert_to_display (char *str
, int len
)
283 return g_string_new("");
285 if (cp_display
== cp_source
)
286 return g_string_new(str
);
288 conv
= str_crt_conv_from (cp_source
);
290 buff
= g_string_new("");
291 str_nconvert (conv
, str
, len
, buff
);
292 str_close_conv (conv
);
297 convert_from_input (char *str
)
303 *str
= conv_input
[(unsigned char) *str
];
309 str_convert_to_input (char *str
)
311 return str_nconvert_to_input (str
, -1);
315 str_nconvert_to_input (char *str
, int len
)
321 return g_string_new("");
323 if (cp_display
== cp_source
)
324 return g_string_new(str
);
326 conv
= str_crt_conv_to (cp_source
);
328 buff
= g_string_new("");
329 str_nconvert (conv
, str
, len
, buff
);
330 str_close_conv (conv
);
335 convert_from_utf_to_current (const char *str
)
337 unsigned char buf_ch
[6 + 1];
338 unsigned char ch
= '.';
345 cp_to
= get_codepage_id ( source_codepage
);
346 conv
= str_crt_conv_to ( cp_to
);
348 if (conv
!= INVALID_CONV
) {
349 switch (str_translate_char (conv
, str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
358 str_close_conv (conv
);
366 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
368 unsigned char str
[6 + 1];
369 unsigned char buf_ch
[6 + 1];
370 unsigned char ch
= '.';
374 res
= g_unichar_to_utf8 (input_char
, (char *)str
);
380 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
393 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
395 unsigned char str
[2];
396 unsigned char buf_ch
[6 + 1];
400 str
[0] = (unsigned char) input_char
;
403 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
405 res
= g_utf8_get_char_validated ((char *)buf_ch
, -1);
421 convert_from_8bit_to_utf_c2 (const char input_char
)
423 unsigned char str
[2];
424 unsigned char buf_ch
[6 + 1];
430 str
[0] = (unsigned char) input_char
;
433 cp_from
= get_codepage_id ( source_codepage
);
434 conv
= str_crt_conv_to (cp_from
);
436 if (conv
!= INVALID_CONV
) {
437 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof(buf_ch
))) {
439 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
451 str_close_conv (conv
);
456 #endif /* HAVE_CHARSET */