1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
32 #include "strutil.h" /* utf-8 functions */
37 struct codepage_desc
*codepages
;
39 unsigned char conv_displ
[256];
40 unsigned char conv_input
[256];
42 static char *cp_display
= NULL
;
43 static char *cp_source
= NULL
;
47 load_codepages_list (void)
54 extern int display_codepage
;
55 char *default_codepage
= NULL
;
57 fname
= concat_dir_and_file (mc_home
, CHARSETS_INDEX
);
58 if (!(f
= fopen (fname
, "r"))) {
59 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
65 for (n_codepages
= 0; fgets (buf
, sizeof (buf
), f
);)
66 if (buf
[0] != '\n' && buf
[0] != '\0' && buf
[0] != '#')
70 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
72 for (n_codepages
= 0; fgets (buf
, sizeof buf
, f
);) {
73 /* split string into id and cpname */
75 int buflen
= strlen (buf
);
77 if (*p
== '\n' || *p
== '\0' || *p
== '#')
80 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
81 buf
[buflen
- 1] = '\0';
82 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
89 while (*p
== '\t' || *p
== ' ')
94 if (strcmp (buf
, "default") == 0) {
95 default_codepage
= g_strdup (p
);
99 codepages
[n_codepages
].id
= g_strdup (buf
);
100 codepages
[n_codepages
].name
= g_strdup (p
);
104 if (default_codepage
) {
105 display_codepage
= get_codepage_index (default_codepage
);
106 g_free (default_codepage
);
109 result
= n_codepages
;
116 free_codepages_list (void)
118 if (n_codepages
> 0) {
120 for (i
= 0; i
< n_codepages
; i
++) {
121 g_free (codepages
[i
].id
);
122 g_free (codepages
[i
].name
);
130 #define OTHER_8BIT "Other_8_bit"
133 get_codepage_id (int n
)
135 return (n
< 0) ? OTHER_8BIT
: codepages
[n
].id
;
139 get_codepage_index (const char *id
)
142 if (strcmp (id
, OTHER_8BIT
) == 0)
144 for (i
= 0; codepages
[i
].id
; ++i
)
145 if (strcmp (id
, codepages
[i
].id
) == 0)
151 translate_character (iconv_t cd
, char c
)
153 char outbuf
[4], *obuf
;
154 size_t ibuflen
, obuflen
, count
;
156 ICONV_CONST
char *ibuf
= &c
;
161 count
= iconv (cd
, &ibuf
, &ibuflen
, &obuf
, &obuflen
);
162 if (count
!= ((size_t) -1) && ibuflen
== 0)
171 * FIXME: This assumes that ASCII is always the first encoding
177 init_translation_table (int cpsource
, int cpdisplay
)
181 const char *cpsour
, *cpdisp
;
183 /* Fill inpit <-> display tables */
185 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
186 for (i
= 0; i
<= 255; ++i
) {
193 for (i
= 0; i
<= 127; ++i
) {
198 cp_display
= cpsour
= codepages
[cpsource
].id
;
199 cp_source
= cpdisp
= codepages
[cpdisplay
].id
;
201 /* display <- inpit table */
203 cd
= iconv_open (cpdisp
, cpsour
);
204 if (cd
== (iconv_t
) - 1) {
205 g_snprintf (errbuf
, sizeof (errbuf
),
206 _("Cannot translate from %s to %s"), cpsour
, cpdisp
);
210 for (i
= 128; i
<= 255; ++i
)
211 conv_displ
[i
] = translate_character (cd
, i
);
215 /* inpit <- display table */
217 cd
= iconv_open (cpsour
, cpdisp
);
218 if (cd
== (iconv_t
) - 1) {
219 g_snprintf (errbuf
, sizeof (errbuf
),
220 _("Cannot translate from %s to %s"), cpdisp
, cpsour
);
224 for (i
= 128; i
<= 255; ++i
) {
226 ch
= translate_character (cd
, i
);
227 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
236 convert_to_display (char *str
)
242 *str
= conv_displ
[(unsigned char) *str
];
248 str_convert_to_display (char *str
)
256 if (cp_display
== cp_source
)
257 return g_string_new(str
);
259 conv
= str_crt_conv_from (cp_display
);
261 buff
= g_string_new("");
262 str_convert (conv
, str
, buff
);
267 convert_from_input (char *str
)
273 *str
= conv_input
[(unsigned char) *str
];
279 str_convert_from_input (char *str
)
287 if (cp_display
== cp_source
)
288 return g_string_new(str
);
290 conv
= str_crt_conv_to (cp_display
);
292 buff
= g_string_new("");
293 str_convert (conv
, str
, buff
);
298 convert_from_utf_to_current (const char *str
)
304 unsigned char buf_ch
[6 + 1];
305 unsigned char ch
= '.';
309 cp_to
= get_codepage_id ( source_codepage
);
310 conv
= str_crt_conv_to ( cp_to
);
312 if (conv
!= INVALID_CONV
) {
313 switch (str_translate_char (conv
, str
, -1, buf_ch
, sizeof(buf_ch
))) {
322 str_close_conv (conv
);
330 convert_from_utf_to_current_c (const int input_char
)
332 unsigned char str
[6 + 1];
333 unsigned char buf_ch
[6 + 1];
334 unsigned char ch
= '.';
336 char *cp_from
= NULL
;
338 GString
*translated_data
;
341 res
= g_unichar_to_utf8 (input_char
, str
);
347 cp_from
= get_codepage_id ( source_codepage
);
348 conv
= str_crt_conv_from (cp_from
);
350 if (conv
!= INVALID_CONV
) {
351 switch (str_translate_char (conv
, str
, sizeof(str
), buf_ch
, sizeof(buf_ch
))) {
361 str_close_conv (conv
);
367 #endif /* HAVE_CHARSET */