1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
35 #include "lib/charsets.h"
37 /*** global variables ****************************************************************************/
39 GPtrArray
*codepages
= NULL
;
41 unsigned char conv_displ
[256];
42 unsigned char conv_input
[256];
44 const char *cp_display
= NULL
;
45 const char *cp_source
= NULL
;
47 /*** file scope macro definitions ****************************************************************/
49 #define OTHER_8BIT "Other_8_bit"
52 * FIXME: This assumes that ASCII is always the first encoding
57 /*** file scope type declarations ****************************************************************/
59 /*** file scope variables ************************************************************************/
61 /*** file scope functions ************************************************************************/
62 /* --------------------------------------------------------------------------------------------- */
64 static codepage_desc
*
65 new_codepage_desc (const char *id
, const char *name
)
69 desc
= g_new (codepage_desc
, 1);
70 desc
->id
= g_strdup (id
);
71 desc
->name
= g_strdup (name
);
76 /* --------------------------------------------------------------------------------------------- */
79 free_codepage_desc (gpointer data
, gpointer user_data
)
81 codepage_desc
*desc
= (codepage_desc
*) data
;
89 /* --------------------------------------------------------------------------------------------- */
90 /* returns display codepage */
93 load_codepages_list_from_file (GPtrArray
** list
, const char *fname
)
98 char *default_codepage
= NULL
;
100 f
= fopen (fname
, "r");
104 for (i
= 0; fgets (buf
, sizeof buf
, f
) != NULL
;)
106 /* split string into id and cpname */
108 size_t buflen
= strlen (buf
);
110 if (*p
== '\n' || *p
== '\0' || *p
== '#')
113 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
114 buf
[buflen
- 1] = '\0';
115 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
125 if (strcmp (buf
, "default") == 0)
126 default_codepage
= g_strdup (p
);
129 const char *id
= buf
;
133 *list
= g_ptr_array_sized_new (16);
134 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
138 /* whether id is already present in list */
139 /* if yes, overwrite description */
140 for (i
= 0; i
< (*list
)->len
; i
++)
144 desc
= (codepage_desc
*) g_ptr_array_index (*list
, i
);
146 if (strcmp (id
, desc
->id
) == 0)
150 desc
->name
= g_strdup (p
);
156 if (i
== (*list
)->len
)
157 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
162 if (default_codepage
!= NULL
)
164 mc_global
.display_codepage
= get_codepage_index (default_codepage
);
165 g_free (default_codepage
);
172 /* --------------------------------------------------------------------------------------------- */
175 translate_character (GIConv cd
, char c
)
177 gchar
*tmp_buff
= NULL
;
178 gsize bytes_read
, bytes_written
= 0;
179 const char *ibuf
= &c
;
184 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
191 /* --------------------------------------------------------------------------------------------- */
192 /*** public functions ****************************************************************************/
193 /* --------------------------------------------------------------------------------------------- */
196 load_codepages_list (void)
200 /* 1: try load /usr/share/mc/mc.charsets */
201 fname
= g_build_filename (mc_global
.share_data_dir
, CHARSETS_LIST
, (char *) NULL
);
202 load_codepages_list_from_file (&codepages
, fname
);
205 /* 2: try load /etc/mc/mc.charsets */
206 fname
= g_build_filename (mc_global
.sysconfig_dir
, CHARSETS_LIST
, (char *) NULL
);
207 load_codepages_list_from_file (&codepages
, fname
);
210 if (codepages
== NULL
)
212 /* files are not found, add defaullt codepage */
213 fprintf (stderr
, "%s\n", _("Warning: cannot load codepages list"));
215 codepages
= g_ptr_array_new ();
216 g_ptr_array_add (codepages
, new_codepage_desc ("ASCII", _("7-bit ASCII")));
220 /* --------------------------------------------------------------------------------------------- */
223 free_codepages_list (void)
225 g_ptr_array_foreach (codepages
, free_codepage_desc
, NULL
);
226 g_ptr_array_free (codepages
, TRUE
);
229 /* --------------------------------------------------------------------------------------------- */
232 get_codepage_id (const int n
)
234 return (n
< 0) ? OTHER_8BIT
: ((codepage_desc
*) g_ptr_array_index (codepages
, n
))->id
;
237 /* --------------------------------------------------------------------------------------------- */
240 get_codepage_index (const char *id
)
243 if (strcmp (id
, OTHER_8BIT
) == 0)
245 if (codepages
== NULL
)
247 for (i
= 0; i
< codepages
->len
; i
++)
248 if (strcmp (id
, ((codepage_desc
*) g_ptr_array_index (codepages
, i
))->id
) == 0)
253 /* --------------------------------------------------------------------------------------------- */
254 /** Check if specified encoding can be used in mc.
255 * @param encoding name of encoding
256 * @returns TRUE if encoding has supported by mc, FALSE otherwise
260 is_supported_encoding (const char *encoding
)
262 gboolean result
= FALSE
;
265 for (t
= 0; t
< codepages
->len
; t
++)
267 const char *id
= ((codepage_desc
*) g_ptr_array_index (codepages
, t
))->id
;
268 result
|= (g_ascii_strncasecmp (encoding
, id
, strlen (id
)) == 0);
274 /* --------------------------------------------------------------------------------------------- */
277 init_translation_table (int cpsource
, int cpdisplay
)
282 /* Fill inpit <-> display tables */
284 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
)
286 for (i
= 0; i
<= 255; ++i
)
290 cp_source
= cp_display
;
295 for (i
= 0; i
<= 127; ++i
)
300 cp_source
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpsource
))->id
;
301 cp_display
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpdisplay
))->id
;
303 /* display <- inpit table */
305 cd
= g_iconv_open (cp_display
, cp_source
);
306 if (cd
== INVALID_CONV
)
307 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source
, cp_display
);
309 for (i
= 128; i
<= 255; ++i
)
310 conv_displ
[i
] = translate_character (cd
, i
);
314 /* inpit <- display table */
316 cd
= g_iconv_open (cp_source
, cp_display
);
317 if (cd
== INVALID_CONV
)
318 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display
, cp_source
);
320 for (i
= 128; i
<= 255; ++i
)
323 ch
= translate_character (cd
, i
);
324 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
332 /* --------------------------------------------------------------------------------------------- */
335 convert_to_display (char *str
)
342 *str
= conv_displ
[(unsigned char) *str
];
347 /* --------------------------------------------------------------------------------------------- */
350 str_convert_to_display (char *str
)
352 return str_nconvert_to_display (str
, -1);
356 /* --------------------------------------------------------------------------------------------- */
359 str_nconvert_to_display (char *str
, int len
)
365 return g_string_new ("");
367 if (cp_display
== cp_source
)
368 return g_string_new (str
);
370 conv
= str_crt_conv_from (cp_source
);
372 buff
= g_string_new ("");
373 str_nconvert (conv
, str
, len
, buff
);
374 str_close_conv (conv
);
378 /* --------------------------------------------------------------------------------------------- */
381 convert_from_input (char *str
)
388 *str
= conv_input
[(unsigned char) *str
];
393 /* --------------------------------------------------------------------------------------------- */
396 str_convert_to_input (char *str
)
398 return str_nconvert_to_input (str
, -1);
401 /* --------------------------------------------------------------------------------------------- */
404 str_nconvert_to_input (char *str
, int len
)
410 return g_string_new ("");
412 if (cp_display
== cp_source
)
413 return g_string_new (str
);
415 conv
= str_crt_conv_to (cp_source
);
417 buff
= g_string_new ("");
418 str_nconvert (conv
, str
, len
, buff
);
419 str_close_conv (conv
);
423 /* --------------------------------------------------------------------------------------------- */
426 convert_from_utf_to_current (const char *str
)
428 unsigned char buf_ch
[6 + 1];
429 unsigned char ch
= '.';
436 cp_to
= get_codepage_id (mc_global
.source_codepage
);
437 conv
= str_crt_conv_to (cp_to
);
439 if (conv
!= INVALID_CONV
)
441 switch (str_translate_char (conv
, str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
451 str_close_conv (conv
);
458 /* --------------------------------------------------------------------------------------------- */
461 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
463 unsigned char str
[6 + 1];
464 unsigned char buf_ch
[6 + 1];
465 unsigned char ch
= '.';
469 res
= g_unichar_to_utf8 (input_char
, (char *) str
);
476 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
489 /* --------------------------------------------------------------------------------------------- */
492 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
494 unsigned char str
[2];
495 unsigned char buf_ch
[6 + 1];
499 str
[0] = (unsigned char) input_char
;
502 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
505 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
523 /* --------------------------------------------------------------------------------------------- */
526 convert_from_8bit_to_utf_c2 (const char input_char
)
528 unsigned char str
[2];
529 unsigned char buf_ch
[6 + 1];
535 str
[0] = (unsigned char) input_char
;
538 cp_from
= get_codepage_id (mc_global
.source_codepage
);
539 conv
= str_crt_conv_to (cp_from
);
541 if (conv
!= INVALID_CONV
)
543 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
546 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
561 str_close_conv (conv
);
567 /* --------------------------------------------------------------------------------------------- */
569 #endif /* HAVE_CHARSET */