2 Text conversion from one charset to another.
4 Copyright (C) 2001-2015
5 Free Software Foundation, Inc.
8 Walery Studennikov <despair@sama.ru>
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
27 * \brief Source: Text conversion from one charset to another
36 #include "lib/global.h"
37 #include "lib/strutil.h" /* utf-8 functions */
38 #include "lib/fileloc.h"
39 #include "lib/charsets.h"
41 /*** global variables ****************************************************************************/
43 GPtrArray
*codepages
= NULL
;
45 unsigned char conv_displ
[256];
46 unsigned char conv_input
[256];
48 const char *cp_display
= NULL
;
49 const char *cp_source
= NULL
;
51 /*** file scope macro definitions ****************************************************************/
53 #define UNKNCHAR '\001'
55 #define OTHER_8BIT "Other_8_bit"
58 * FIXME: This assumes that ASCII is always the first encoding
63 /*** file scope type declarations ****************************************************************/
65 /*** file scope variables ************************************************************************/
67 /*** file scope functions ************************************************************************/
68 /* --------------------------------------------------------------------------------------------- */
70 static codepage_desc
*
71 new_codepage_desc (const char *id
, const char *name
)
75 desc
= g_new (codepage_desc
, 1);
76 desc
->id
= g_strdup (id
);
77 desc
->name
= g_strdup (name
);
82 /* --------------------------------------------------------------------------------------------- */
85 free_codepage_desc (gpointer data
, gpointer user_data
)
87 codepage_desc
*desc
= (codepage_desc
*) data
;
95 /* --------------------------------------------------------------------------------------------- */
96 /* returns display codepage */
99 load_codepages_list_from_file (GPtrArray
** list
, const char *fname
)
102 char buf
[BUF_MEDIUM
];
103 char *default_codepage
= NULL
;
105 f
= fopen (fname
, "r");
109 while (fgets (buf
, sizeof buf
, f
) != NULL
)
111 /* split string into id and cpname */
113 size_t buflen
= strlen (buf
);
115 if (*p
== '\n' || *p
== '\0' || *p
== '#')
118 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
119 buf
[buflen
- 1] = '\0';
120 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
130 if (strcmp (buf
, "default") == 0)
131 default_codepage
= g_strdup (p
);
134 const char *id
= buf
;
138 *list
= g_ptr_array_sized_new (16);
139 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
145 /* whether id is already present in list */
146 /* if yes, overwrite description */
147 for (i
= 0; i
< (*list
)->len
; i
++)
151 desc
= (codepage_desc
*) g_ptr_array_index (*list
, i
);
153 if (strcmp (id
, desc
->id
) == 0)
157 desc
->name
= g_strdup (p
);
163 if (i
== (*list
)->len
)
164 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
169 if (default_codepage
!= NULL
)
171 mc_global
.display_codepage
= get_codepage_index (default_codepage
);
172 g_free (default_codepage
);
179 /* --------------------------------------------------------------------------------------------- */
182 translate_character (GIConv cd
, char c
)
184 gchar
*tmp_buff
= NULL
;
185 gsize bytes_read
, bytes_written
= 0;
186 const char *ibuf
= &c
;
191 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
198 /* --------------------------------------------------------------------------------------------- */
199 /*** public functions ****************************************************************************/
200 /* --------------------------------------------------------------------------------------------- */
203 load_codepages_list (void)
207 /* 1: try load /usr/share/mc/mc.charsets */
208 fname
= g_build_filename (mc_global
.share_data_dir
, CHARSETS_LIST
, (char *) NULL
);
209 load_codepages_list_from_file (&codepages
, fname
);
212 /* 2: try load /etc/mc/mc.charsets */
213 fname
= g_build_filename (mc_global
.sysconfig_dir
, CHARSETS_LIST
, (char *) NULL
);
214 load_codepages_list_from_file (&codepages
, fname
);
217 if (codepages
== NULL
)
219 /* files are not found, add defaullt codepage */
220 fprintf (stderr
, "%s\n", _("Warning: cannot load codepages list"));
222 codepages
= g_ptr_array_new ();
223 g_ptr_array_add (codepages
, new_codepage_desc (DEFAULT_CHARSET
, _("7-bit ASCII")));
227 /* --------------------------------------------------------------------------------------------- */
230 free_codepages_list (void)
232 g_ptr_array_foreach (codepages
, free_codepage_desc
, NULL
);
233 g_ptr_array_free (codepages
, TRUE
);
236 /* --------------------------------------------------------------------------------------------- */
239 get_codepage_id (const int n
)
241 return (n
< 0) ? OTHER_8BIT
: ((codepage_desc
*) g_ptr_array_index (codepages
, n
))->id
;
244 /* --------------------------------------------------------------------------------------------- */
247 get_codepage_index (const char *id
)
250 if (strcmp (id
, OTHER_8BIT
) == 0)
252 if (codepages
== NULL
)
254 for (i
= 0; i
< codepages
->len
; i
++)
255 if (strcmp (id
, ((codepage_desc
*) g_ptr_array_index (codepages
, i
))->id
) == 0)
260 /* --------------------------------------------------------------------------------------------- */
261 /** Check if specified encoding can be used in mc.
262 * @param encoding name of encoding
263 * @return TRUE if encoding is supported by mc, FALSE otherwise
267 is_supported_encoding (const char *encoding
)
269 gboolean result
= FALSE
;
272 for (t
= 0; t
< codepages
->len
; t
++)
274 const char *id
= ((codepage_desc
*) g_ptr_array_index (codepages
, t
))->id
;
275 result
|= (g_ascii_strncasecmp (encoding
, id
, strlen (id
)) == 0);
281 /* --------------------------------------------------------------------------------------------- */
284 init_translation_table (int cpsource
, int cpdisplay
)
289 /* Fill inpit <-> display tables */
291 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
)
293 for (i
= 0; i
<= 255; ++i
)
297 cp_source
= cp_display
;
302 for (i
= 0; i
<= 127; ++i
)
307 cp_source
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpsource
))->id
;
308 cp_display
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpdisplay
))->id
;
310 /* display <- inpit table */
312 cd
= g_iconv_open (cp_display
, cp_source
);
313 if (cd
== INVALID_CONV
)
314 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source
, cp_display
);
316 for (i
= 128; i
<= 255; ++i
)
317 conv_displ
[i
] = translate_character (cd
, i
);
321 /* inpit <- display table */
323 cd
= g_iconv_open (cp_source
, cp_display
);
324 if (cd
== INVALID_CONV
)
325 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display
, cp_source
);
327 for (i
= 128; i
<= 255; ++i
)
330 ch
= translate_character (cd
, i
);
331 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
339 /* --------------------------------------------------------------------------------------------- */
342 convert_to_display (char *str
)
349 *str
= conv_displ
[(unsigned char) *str
];
354 /* --------------------------------------------------------------------------------------------- */
357 str_convert_to_display (char *str
)
359 return str_nconvert_to_display (str
, -1);
363 /* --------------------------------------------------------------------------------------------- */
366 str_nconvert_to_display (char *str
, int len
)
372 return g_string_new ("");
374 if (cp_display
== cp_source
)
375 return g_string_new (str
);
377 conv
= str_crt_conv_from (cp_source
);
379 buff
= g_string_new ("");
380 str_nconvert (conv
, str
, len
, buff
);
381 str_close_conv (conv
);
385 /* --------------------------------------------------------------------------------------------- */
388 convert_from_input (char *str
)
395 *str
= conv_input
[(unsigned char) *str
];
400 /* --------------------------------------------------------------------------------------------- */
403 str_convert_to_input (char *str
)
405 return str_nconvert_to_input (str
, -1);
408 /* --------------------------------------------------------------------------------------------- */
411 str_nconvert_to_input (char *str
, int len
)
417 return g_string_new ("");
419 if (cp_display
== cp_source
)
420 return g_string_new (str
);
422 conv
= str_crt_conv_to (cp_source
);
424 buff
= g_string_new ("");
425 str_nconvert (conv
, str
, len
, buff
);
426 str_close_conv (conv
);
430 /* --------------------------------------------------------------------------------------------- */
433 convert_from_utf_to_current (const char *str
)
435 unsigned char buf_ch
[UTF8_CHAR_LEN
+ 1];
436 unsigned char ch
= '.';
443 cp_to
= get_codepage_id (mc_global
.source_codepage
);
444 conv
= str_crt_conv_to (cp_to
);
446 if (conv
!= INVALID_CONV
)
448 switch (str_translate_char (conv
, str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
460 str_close_conv (conv
);
466 /* --------------------------------------------------------------------------------------------- */
469 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
471 unsigned char str
[UTF8_CHAR_LEN
+ 1];
472 unsigned char buf_ch
[UTF8_CHAR_LEN
+ 1];
473 unsigned char ch
= '.';
476 res
= g_unichar_to_utf8 (input_char
, (char *) str
);
482 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
498 /* --------------------------------------------------------------------------------------------- */
501 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
503 unsigned char str
[2];
504 unsigned char buf_ch
[UTF8_CHAR_LEN
+ 1];
507 str
[0] = (unsigned char) input_char
;
510 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
516 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
517 ch
= res
>= 0 ? res
: buf_ch
[0];
530 /* --------------------------------------------------------------------------------------------- */
533 convert_from_8bit_to_utf_c2 (const char input_char
)
535 unsigned char str
[2];
540 str
[0] = (unsigned char) input_char
;
543 cp_from
= get_codepage_id (mc_global
.source_codepage
);
544 conv
= str_crt_conv_to (cp_from
);
546 if (conv
!= INVALID_CONV
)
548 unsigned char buf_ch
[UTF8_CHAR_LEN
+ 1];
550 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
556 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
557 ch
= res
>= 0 ? res
: buf_ch
[0];
566 str_close_conv (conv
);
572 /* --------------------------------------------------------------------------------------------- */