1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
39 GPtrArray
*codepages
= NULL
;
41 unsigned char conv_displ
[256];
42 unsigned char conv_input
[256];
44 const char *cp_display
= NULL
;
45 const char *cp_source
= NULL
;
47 static codepage_desc
*
48 new_codepage_desc (const char *id
, const char *name
)
52 desc
= g_new (codepage_desc
, 1);
53 desc
->id
= g_strdup (id
);
54 desc
->name
= g_strdup (name
);
60 free_codepage_desc (gpointer data
, gpointer user_data
)
62 codepage_desc
*desc
= (codepage_desc
*) data
;
70 /* returns display codepage */
72 load_codepages_list_from_file (GPtrArray
**list
, const char *fname
)
77 char *default_codepage
= NULL
;
79 f
= fopen (fname
, "r");
83 for (i
= 0; fgets (buf
, sizeof buf
, f
) != NULL
; )
85 /* split string into id and cpname */
87 size_t buflen
= strlen (buf
);
89 if (*p
== '\n' || *p
== '\0' || *p
== '#')
92 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
93 buf
[buflen
- 1] = '\0';
94 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
104 if (strcmp (buf
, "default") == 0)
105 default_codepage
= g_strdup (p
);
108 const char *id
= buf
;
112 *list
= g_ptr_array_sized_new (16);
113 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
117 /* whether id is already present in list */
118 /* if yes, overwrite description */
119 for (i
= 0; i
< (*list
)->len
; i
++)
123 desc
= (codepage_desc
*) g_ptr_array_index (*list
, i
);
125 if (strcmp (id
, desc
->id
) == 0)
129 desc
->name
= g_strdup (p
);
135 if (i
== (*list
)->len
)
136 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
141 if (default_codepage
!= NULL
)
143 display_codepage
= get_codepage_index (default_codepage
);
144 g_free (default_codepage
);
152 load_codepages_list (void)
156 /* 1: try load /usr/share/mc/mc.charsets */
157 fname
= g_build_filename (mc_home_alt
, CHARSETS_LIST
, (char *) NULL
);
158 load_codepages_list_from_file (&codepages
, fname
);
161 /* 2: try load /etc/mc/mc.charsets */
162 fname
= g_build_filename (mc_home
, CHARSETS_LIST
, (char *) NULL
);
163 load_codepages_list_from_file (&codepages
, fname
);
166 if (codepages
== NULL
)
168 /* files are not found, add defaullt codepage */
169 fprintf (stderr
, "%s\n", _("Warning: cannot load codepages list"));
171 codepages
= g_ptr_array_new ();
172 g_ptr_array_add (codepages
, new_codepage_desc ("ASCII", _("7-bit ASCII")));
177 free_codepages_list (void)
179 g_ptr_array_foreach (codepages
, free_codepage_desc
, NULL
);
180 g_ptr_array_free (codepages
, TRUE
);
183 #define OTHER_8BIT "Other_8_bit"
186 get_codepage_id (const int n
)
188 return (n
< 0) ? OTHER_8BIT
: ((codepage_desc
*) g_ptr_array_index (codepages
, n
))->id
;
192 get_codepage_index (const char *id
)
195 if (strcmp (id
, OTHER_8BIT
) == 0)
197 if (codepages
== NULL
)
199 for (i
= 0; i
< codepages
->len
; i
++)
200 if (strcmp (id
, ((codepage_desc
*) g_ptr_array_index (codepages
, i
))->id
) == 0)
205 /** Check if specified encoding can be used in mc.
206 * @param encoding name of encoding
207 * @returns TRUE if encoding has supported by mc, FALSE otherwise
210 is_supported_encoding (const char *encoding
)
212 gboolean result
= FALSE
;
215 for (t
= 0; t
< codepages
->len
; t
++)
217 const char *id
= ((codepage_desc
*) g_ptr_array_index (codepages
, t
))->id
;
218 result
|= (g_ascii_strncasecmp (encoding
, id
, strlen (id
)) == 0);
225 translate_character (GIConv cd
, char c
)
227 gchar
*tmp_buff
= NULL
;
228 gsize bytes_read
, bytes_written
= 0;
229 const char *ibuf
= &c
;
234 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
242 * FIXME: This assumes that ASCII is always the first encoding
248 init_translation_table (int cpsource
, int cpdisplay
)
253 /* Fill inpit <-> display tables */
255 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
256 for (i
= 0; i
<= 255; ++i
) {
259 cp_source
= cp_display
;
264 for (i
= 0; i
<= 127; ++i
) {
268 cp_source
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpsource
))->id
;
269 cp_display
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpdisplay
))->id
;
271 /* display <- inpit table */
273 cd
= g_iconv_open (cp_display
, cp_source
);
274 if (cd
== INVALID_CONV
)
275 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source
, cp_display
);
277 for (i
= 128; i
<= 255; ++i
)
278 conv_displ
[i
] = translate_character (cd
, i
);
282 /* inpit <- display table */
284 cd
= g_iconv_open (cp_source
, cp_display
);
285 if (cd
== INVALID_CONV
)
286 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display
, cp_source
);
288 for (i
= 128; i
<= 255; ++i
) {
290 ch
= translate_character (cd
, i
);
291 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
300 convert_to_display (char *str
)
306 *str
= conv_displ
[(unsigned char) *str
];
312 str_convert_to_display (char *str
)
314 return str_nconvert_to_display (str
, -1);
319 str_nconvert_to_display (char *str
, int len
)
325 return g_string_new("");
327 if (cp_display
== cp_source
)
328 return g_string_new(str
);
330 conv
= str_crt_conv_from (cp_source
);
332 buff
= g_string_new("");
333 str_nconvert (conv
, str
, len
, buff
);
334 str_close_conv (conv
);
339 convert_from_input (char *str
)
345 *str
= conv_input
[(unsigned char) *str
];
351 str_convert_to_input (char *str
)
353 return str_nconvert_to_input (str
, -1);
357 str_nconvert_to_input (char *str
, int len
)
363 return g_string_new("");
365 if (cp_display
== cp_source
)
366 return g_string_new(str
);
368 conv
= str_crt_conv_to (cp_source
);
370 buff
= g_string_new("");
371 str_nconvert (conv
, str
, len
, buff
);
372 str_close_conv (conv
);
377 convert_from_utf_to_current (const char *str
)
379 unsigned char buf_ch
[6 + 1];
380 unsigned char ch
= '.';
387 cp_to
= get_codepage_id ( source_codepage
);
388 conv
= str_crt_conv_to ( cp_to
);
390 if (conv
!= INVALID_CONV
) {
391 switch (str_translate_char (conv
, str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
400 str_close_conv (conv
);
408 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
410 unsigned char str
[6 + 1];
411 unsigned char buf_ch
[6 + 1];
412 unsigned char ch
= '.';
416 res
= g_unichar_to_utf8 (input_char
, (char *)str
);
422 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
435 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
437 unsigned char str
[2];
438 unsigned char buf_ch
[6 + 1];
442 str
[0] = (unsigned char) input_char
;
445 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
447 res
= g_utf8_get_char_validated ((char *)buf_ch
, -1);
463 convert_from_8bit_to_utf_c2 (const char input_char
)
465 unsigned char str
[2];
466 unsigned char buf_ch
[6 + 1];
472 str
[0] = (unsigned char) input_char
;
475 cp_from
= get_codepage_id ( source_codepage
);
476 conv
= str_crt_conv_to (cp_from
);
478 if (conv
!= INVALID_CONV
) {
479 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof(buf_ch
))) {
481 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
493 str_close_conv (conv
);
498 #endif /* HAVE_CHARSET */