1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
32 #include "lib/global.h"
33 #include "lib/strutil.h" /* utf-8 functions */
34 #include "lib/fileloc.h"
35 #include "lib/charsets.h"
39 /*** global variables ****************************************************************************/
41 GPtrArray
*codepages
= NULL
;
43 unsigned char conv_displ
[256];
44 unsigned char conv_input
[256];
46 const char *cp_display
= NULL
;
47 const char *cp_source
= NULL
;
49 /*** file scope macro definitions ****************************************************************/
51 #define OTHER_8BIT "Other_8_bit"
54 * FIXME: This assumes that ASCII is always the first encoding
59 /*** file scope type declarations ****************************************************************/
61 /*** file scope variables ************************************************************************/
63 /*** file scope functions ************************************************************************/
64 /* --------------------------------------------------------------------------------------------- */
66 static codepage_desc
*
67 new_codepage_desc (const char *id
, const char *name
)
71 desc
= g_new (codepage_desc
, 1);
72 desc
->id
= g_strdup (id
);
73 desc
->name
= g_strdup (name
);
78 /* --------------------------------------------------------------------------------------------- */
81 free_codepage_desc (gpointer data
, gpointer user_data
)
83 codepage_desc
*desc
= (codepage_desc
*) data
;
91 /* --------------------------------------------------------------------------------------------- */
92 /* returns display codepage */
95 load_codepages_list_from_file (GPtrArray
** list
, const char *fname
)
100 char *default_codepage
= NULL
;
102 f
= fopen (fname
, "r");
106 for (i
= 0; fgets (buf
, sizeof buf
, f
) != NULL
;)
108 /* split string into id and cpname */
110 size_t buflen
= strlen (buf
);
112 if (*p
== '\n' || *p
== '\0' || *p
== '#')
115 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
116 buf
[buflen
- 1] = '\0';
117 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
127 if (strcmp (buf
, "default") == 0)
128 default_codepage
= g_strdup (p
);
131 const char *id
= buf
;
135 *list
= g_ptr_array_sized_new (16);
136 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
140 /* whether id is already present in list */
141 /* if yes, overwrite description */
142 for (i
= 0; i
< (*list
)->len
; i
++)
146 desc
= (codepage_desc
*) g_ptr_array_index (*list
, i
);
148 if (strcmp (id
, desc
->id
) == 0)
152 desc
->name
= g_strdup (p
);
158 if (i
== (*list
)->len
)
159 g_ptr_array_add (*list
, new_codepage_desc (id
, p
));
164 if (default_codepage
!= NULL
)
166 display_codepage
= get_codepage_index (default_codepage
);
167 g_free (default_codepage
);
174 /* --------------------------------------------------------------------------------------------- */
177 translate_character (GIConv cd
, char c
)
179 gchar
*tmp_buff
= NULL
;
180 gsize bytes_read
, bytes_written
= 0;
181 const char *ibuf
= &c
;
186 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
193 /* --------------------------------------------------------------------------------------------- */
194 /*** public functions ****************************************************************************/
195 /* --------------------------------------------------------------------------------------------- */
198 load_codepages_list (void)
202 /* 1: try load /usr/share/mc/mc.charsets */
203 fname
= g_build_filename (mc_home_alt
, CHARSETS_LIST
, (char *) NULL
);
204 load_codepages_list_from_file (&codepages
, fname
);
207 /* 2: try load /etc/mc/mc.charsets */
208 fname
= g_build_filename (mc_home
, CHARSETS_LIST
, (char *) NULL
);
209 load_codepages_list_from_file (&codepages
, fname
);
212 if (codepages
== NULL
)
214 /* files are not found, add defaullt codepage */
215 fprintf (stderr
, "%s\n", _("Warning: cannot load codepages list"));
217 codepages
= g_ptr_array_new ();
218 g_ptr_array_add (codepages
, new_codepage_desc ("ASCII", _("7-bit ASCII")));
222 /* --------------------------------------------------------------------------------------------- */
225 free_codepages_list (void)
227 g_ptr_array_foreach (codepages
, free_codepage_desc
, NULL
);
228 g_ptr_array_free (codepages
, TRUE
);
231 /* --------------------------------------------------------------------------------------------- */
234 get_codepage_id (const int n
)
236 return (n
< 0) ? OTHER_8BIT
: ((codepage_desc
*) g_ptr_array_index (codepages
, n
))->id
;
239 /* --------------------------------------------------------------------------------------------- */
242 get_codepage_index (const char *id
)
245 if (strcmp (id
, OTHER_8BIT
) == 0)
247 if (codepages
== NULL
)
249 for (i
= 0; i
< codepages
->len
; i
++)
250 if (strcmp (id
, ((codepage_desc
*) g_ptr_array_index (codepages
, i
))->id
) == 0)
255 /* --------------------------------------------------------------------------------------------- */
256 /** Check if specified encoding can be used in mc.
257 * @param encoding name of encoding
258 * @returns TRUE if encoding has supported by mc, FALSE otherwise
262 is_supported_encoding (const char *encoding
)
264 gboolean result
= FALSE
;
267 for (t
= 0; t
< codepages
->len
; t
++)
269 const char *id
= ((codepage_desc
*) g_ptr_array_index (codepages
, t
))->id
;
270 result
|= (g_ascii_strncasecmp (encoding
, id
, strlen (id
)) == 0);
276 /* --------------------------------------------------------------------------------------------- */
279 init_translation_table (int cpsource
, int cpdisplay
)
284 /* Fill inpit <-> display tables */
286 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
)
288 for (i
= 0; i
<= 255; ++i
)
292 cp_source
= cp_display
;
297 for (i
= 0; i
<= 127; ++i
)
302 cp_source
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpsource
))->id
;
303 cp_display
= ((codepage_desc
*) g_ptr_array_index (codepages
, cpdisplay
))->id
;
305 /* display <- inpit table */
307 cd
= g_iconv_open (cp_display
, cp_source
);
308 if (cd
== INVALID_CONV
)
309 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_source
, cp_display
);
311 for (i
= 128; i
<= 255; ++i
)
312 conv_displ
[i
] = translate_character (cd
, i
);
316 /* inpit <- display table */
318 cd
= g_iconv_open (cp_source
, cp_display
);
319 if (cd
== INVALID_CONV
)
320 return g_strdup_printf (_("Cannot translate from %s to %s"), cp_display
, cp_source
);
322 for (i
= 128; i
<= 255; ++i
)
325 ch
= translate_character (cd
, i
);
326 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
334 /* --------------------------------------------------------------------------------------------- */
337 convert_to_display (char *str
)
344 *str
= conv_displ
[(unsigned char) *str
];
349 /* --------------------------------------------------------------------------------------------- */
352 str_convert_to_display (char *str
)
354 return str_nconvert_to_display (str
, -1);
358 /* --------------------------------------------------------------------------------------------- */
361 str_nconvert_to_display (char *str
, int len
)
367 return g_string_new ("");
369 if (cp_display
== cp_source
)
370 return g_string_new (str
);
372 conv
= str_crt_conv_from (cp_source
);
374 buff
= g_string_new ("");
375 str_nconvert (conv
, str
, len
, buff
);
376 str_close_conv (conv
);
380 /* --------------------------------------------------------------------------------------------- */
383 convert_from_input (char *str
)
390 *str
= conv_input
[(unsigned char) *str
];
395 /* --------------------------------------------------------------------------------------------- */
398 str_convert_to_input (char *str
)
400 return str_nconvert_to_input (str
, -1);
403 /* --------------------------------------------------------------------------------------------- */
406 str_nconvert_to_input (char *str
, int len
)
412 return g_string_new ("");
414 if (cp_display
== cp_source
)
415 return g_string_new (str
);
417 conv
= str_crt_conv_to (cp_source
);
419 buff
= g_string_new ("");
420 str_nconvert (conv
, str
, len
, buff
);
421 str_close_conv (conv
);
425 /* --------------------------------------------------------------------------------------------- */
428 convert_from_utf_to_current (const char *str
)
430 unsigned char buf_ch
[6 + 1];
431 unsigned char ch
= '.';
438 cp_to
= get_codepage_id (source_codepage
);
439 conv
= str_crt_conv_to (cp_to
);
441 if (conv
!= INVALID_CONV
)
443 switch (str_translate_char (conv
, str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
453 str_close_conv (conv
);
460 /* --------------------------------------------------------------------------------------------- */
463 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
465 unsigned char str
[6 + 1];
466 unsigned char buf_ch
[6 + 1];
467 unsigned char ch
= '.';
471 res
= g_unichar_to_utf8 (input_char
, (char *) str
);
478 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
491 /* --------------------------------------------------------------------------------------------- */
494 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
496 unsigned char str
[2];
497 unsigned char buf_ch
[6 + 1];
501 str
[0] = (unsigned char) input_char
;
504 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
507 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
525 /* --------------------------------------------------------------------------------------------- */
528 convert_from_8bit_to_utf_c2 (const char input_char
)
530 unsigned char str
[2];
531 unsigned char buf_ch
[6 + 1];
537 str
[0] = (unsigned char) input_char
;
540 cp_from
= get_codepage_id (source_codepage
);
541 conv
= str_crt_conv_to (cp_from
);
543 if (conv
!= INVALID_CONV
)
545 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof (buf_ch
)))
548 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
563 str_close_conv (conv
);
569 /* --------------------------------------------------------------------------------------------- */
571 #endif /* HAVE_CHARSET */