2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "claws-features.h"
28 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
44 /* For unknown reasons the inconv.m4 macro undefs that macro if no
45 const is needed. This would break the code below so we define it. */
58 #define SUBST_CHAR 0x5f;
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 static CodeConvFunc
conv_get_code_conv_func (const gchar
*src_charset_str
,
112 const gchar
*dest_charset_str
);
114 static gchar
*conv_iconv_strdup_with_cd (const gchar
*inbuf
,
117 static gchar
*conv_iconv_strdup (const gchar
*inbuf
,
118 const gchar
*src_code
,
119 const gchar
*dest_code
);
121 static CharSet
conv_get_locale_charset (void);
122 static CharSet
conv_get_outgoing_charset (void);
123 static CharSet
conv_guess_ja_encoding(const gchar
*str
);
124 static gboolean
conv_is_ja_locale (void);
126 static gint
conv_jistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
127 static gint
conv_euctojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
128 static gint
conv_sjistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
130 static gint
conv_jistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
131 static gint
conv_sjistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
132 static gint
conv_euctoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
133 static gint
conv_anytoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
135 static gint
conv_utf8toeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
136 static gint
conv_utf8tojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
138 static void conv_unreadable_8bit(gchar
*str
);
140 static gint
conv_jistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
141 static gint
conv_sjistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
142 static gint
conv_euctodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
144 static gint
conv_anytodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
145 static gint
conv_ustodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
146 static gint
conv_noconv(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
148 static gboolean strict_mode
= FALSE
;
150 void codeconv_set_strict(gboolean mode
)
155 static gint
conv_jistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
157 const guchar
*in
= inbuf
;
159 JISState state
= JIS_ASCII
;
162 * Loop outputs up to 3 bytes in each pass (aux kanji) and we
163 * need 1 byte to terminate the output
165 while (*in
!= '\0' && (out
- outbuf
) < outlen
- 4) {
169 if (*(in
+ 1) == '@' || *(in
+ 1) == 'B') {
172 } else if (*(in
+ 1) == '(' &&
174 state
= JIS_AUXKANJI
;
177 /* unknown escape sequence */
180 } else if (*in
== '(') {
181 if (*(in
+ 1) == 'B' || *(in
+ 1) == 'J') {
184 } else if (*(in
+ 1) == 'I') {
188 /* unknown escape sequence */
192 /* unknown escape sequence */
195 } else if (*in
== 0x0e) {
198 } else if (*in
== 0x0f) {
207 *out
++ = *in
++ | 0x80;
208 if (*in
== '\0') break;
209 *out
++ = *in
++ | 0x80;
213 *out
++ = *in
++ | 0x80;
217 *out
++ = *in
++ | 0x80;
218 if (*in
== '\0') break;
219 *out
++ = *in
++ | 0x80;
229 #define JIS_HWDAKUTEN 0x5e
230 #define JIS_HWHANDAKUTEN 0x5f
232 static gint
conv_jis_hantozen(guchar
*outbuf
, guchar jis_code
, guchar sound_sym
)
234 static guint16 h2z_tbl
[] = {
236 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
237 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
239 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
240 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
242 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
243 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
245 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
246 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
249 static guint16 dakuten_tbl
[] = {
251 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
252 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
254 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
255 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
258 static guint16 handakuten_tbl
[] = {
260 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
268 if (jis_code
< 0x21 || jis_code
> 0x5f)
271 if (sound_sym
== JIS_HWDAKUTEN
&&
272 jis_code
>= 0x36 && jis_code
<= 0x4e) {
273 out_code
= dakuten_tbl
[jis_code
- 0x30];
275 *outbuf
= out_code
>> 8;
276 *(outbuf
+ 1) = out_code
& 0xff;
281 if (sound_sym
== JIS_HWHANDAKUTEN
&&
282 jis_code
>= 0x4a && jis_code
<= 0x4e) {
283 out_code
= handakuten_tbl
[jis_code
- 0x4a];
284 *outbuf
= out_code
>> 8;
285 *(outbuf
+ 1) = out_code
& 0xff;
289 out_code
= h2z_tbl
[jis_code
- 0x20];
290 *outbuf
= out_code
>> 8;
291 *(outbuf
+ 1) = out_code
& 0xff;
295 static gint
conv_euctojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
297 const guchar
*in
= inbuf
;
299 JISState state
= JIS_ASCII
;
302 * Loop outputs up to 6 bytes in each pass (aux shift + aux
303 * kanji) and we need up to 4 bytes to terminate the output
304 * (ASCII shift + null)
306 while (*in
!= '\0' && (out
- outbuf
) < outlen
- 10) {
310 } else if (iseuckanji(*in
)) {
311 if (iseuckanji(*(in
+ 1))) {
313 *out
++ = *in
++ & 0x7f;
314 *out
++ = *in
++ & 0x7f;
319 if (*in
!= '\0' && !IS_ASCII(*in
)) {
324 } else if (iseuchwkana1(*in
)) {
325 if (iseuchwkana2(*(in
+ 1))) {
326 if (prefs_common
.allow_jisx0201_kana
) {
329 *out
++ = *in
++ & 0x7f;
334 if (iseuchwkana1(*(in
+ 2)) &&
335 iseuchwkana2(*(in
+ 3)))
336 len
= conv_jis_hantozen
338 *(in
+ 1), *(in
+ 3));
340 len
= conv_jis_hantozen
355 if (*in
!= '\0' && !IS_ASCII(*in
)) {
360 } else if (iseucaux(*in
)) {
362 if (iseuckanji(*in
) && iseuckanji(*(in
+ 1))) {
364 *out
++ = *in
++ & 0x7f;
365 *out
++ = *in
++ & 0x7f;
368 if (*in
!= '\0' && !IS_ASCII(*in
)) {
371 if (*in
!= '\0' && !IS_ASCII(*in
)) {
389 static gint
conv_sjistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
391 const guchar
*in
= inbuf
;
395 * Loop outputs up to 2 bytes in each pass and we need 1 byte
396 * to terminate the output
398 while (*in
!= '\0' && (out
- outbuf
) < outlen
- 3) {
401 } else if (issjiskanji1(*in
)) {
402 if (issjiskanji2(*(in
+ 1))) {
404 guchar out2
= *(in
+ 1);
407 row
= out1
< 0xa0 ? 0x70 : 0xb0;
409 out1
= (out1
- row
) * 2 - 1;
410 out2
-= out2
> 0x7f ? 0x20 : 0x1f;
412 out1
= (out1
- row
) * 2;
416 *out
++ = out1
| 0x80;
417 *out
++ = out2
| 0x80;
422 if (*in
!= '\0' && !IS_ASCII(*in
)) {
427 } else if (issjishwkana(*in
)) {
440 static gint
conv_jistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
444 Xalloca(eucstr
, outlen
, return -1);
446 if (conv_jistoeuc(eucstr
, outlen
, inbuf
) <0)
448 if (conv_euctoutf8(outbuf
, outlen
, eucstr
) < 0)
453 static gint
conv_sjistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
457 tmpstr
= conv_iconv_strdup(inbuf
, CS_SHIFT_JIS
, CS_UTF_8
);
459 strncpy2(outbuf
, tmpstr
, outlen
);
463 strncpy2(outbuf
, inbuf
, outlen
);
468 static gint
conv_euctoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
470 static iconv_t cd
= (iconv_t
)-1;
471 static gboolean iconv_ok
= TRUE
;
474 if (cd
== (iconv_t
)-1) {
476 strncpy2(outbuf
, inbuf
, outlen
);
479 cd
= iconv_open(CS_UTF_8
, CS_EUC_JP_MS
);
480 if (cd
== (iconv_t
)-1) {
481 cd
= iconv_open(CS_UTF_8
, CS_EUC_JP
);
482 if (cd
== (iconv_t
)-1) {
483 g_warning("conv_euctoutf8(): %s",
486 strncpy2(outbuf
, inbuf
, outlen
);
492 tmpstr
= conv_iconv_strdup_with_cd(inbuf
, cd
);
494 strncpy2(outbuf
, tmpstr
, outlen
);
498 strncpy2(outbuf
, inbuf
, outlen
);
503 static gint
conv_anytoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
506 switch (conv_guess_ja_encoding(inbuf
)) {
508 r
= conv_jistoutf8(outbuf
, outlen
, inbuf
);
511 r
= conv_sjistoutf8(outbuf
, outlen
, inbuf
);
514 r
= conv_euctoutf8(outbuf
, outlen
, inbuf
);
518 strncpy2(outbuf
, inbuf
, outlen
);
525 static gint
conv_utf8toeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
527 static iconv_t cd
= (iconv_t
)-1;
528 static gboolean iconv_ok
= TRUE
;
531 if (cd
== (iconv_t
)-1) {
533 strncpy2(outbuf
, inbuf
, outlen
);
536 cd
= iconv_open(CS_EUC_JP_MS
, CS_UTF_8
);
537 if (cd
== (iconv_t
)-1) {
538 cd
= iconv_open(CS_EUC_JP
, CS_UTF_8
);
539 if (cd
== (iconv_t
)-1) {
540 g_warning("conv_utf8toeuc(): %s",
543 strncpy2(outbuf
, inbuf
, outlen
);
549 tmpstr
= conv_iconv_strdup_with_cd(inbuf
, cd
);
551 strncpy2(outbuf
, tmpstr
, outlen
);
555 strncpy2(outbuf
, inbuf
, outlen
);
560 static gint
conv_utf8tojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
564 Xalloca(eucstr
, outlen
, return -1);
566 if (conv_utf8toeuc(eucstr
, outlen
, inbuf
) < 0)
568 if (conv_euctojis(outbuf
, outlen
, eucstr
) < 0)
574 static void conv_unreadable_8bit(gchar
*str
)
576 register guchar
*p
= str
;
579 /* convert CR+LF -> LF */
580 if (*p
== '\r' && *(p
+ 1) == '\n')
581 memmove(p
, p
+ 1, strlen(p
));
582 else if (!IS_ASCII(*p
)) *p
= SUBST_CHAR
;
587 static CharSet
conv_guess_ja_encoding(const gchar
*str
)
589 const guchar
*p
= str
;
590 CharSet guessed
= C_US_ASCII
;
593 if (*p
== ESC
&& (*(p
+ 1) == '$' || *(p
+ 1) == '(')) {
594 if (guessed
== C_US_ASCII
)
595 return C_ISO_2022_JP
;
597 } else if (IS_ASCII(*p
)) {
599 } else if (iseuckanji(*p
) && iseuckanji(*(p
+ 1))) {
600 if (*p
>= 0xfd && *p
<= 0xfe)
602 else if (guessed
== C_SHIFT_JIS
) {
603 if ((issjiskanji1(*p
) &&
604 issjiskanji2(*(p
+ 1))) ||
606 guessed
= C_SHIFT_JIS
;
612 } else if (issjiskanji1(*p
) && issjiskanji2(*(p
+ 1))) {
613 if (iseuchwkana1(*p
) && iseuchwkana2(*(p
+ 1)))
614 guessed
= C_SHIFT_JIS
;
618 } else if (issjishwkana(*p
)) {
619 guessed
= C_SHIFT_JIS
;
629 static gint
conv_jistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
631 return conv_jistoutf8(outbuf
, outlen
, inbuf
);
634 static gint
conv_sjistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
636 return conv_sjistoutf8(outbuf
, outlen
, inbuf
);
639 static gint
conv_euctodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
641 return conv_euctoutf8(outbuf
, outlen
, inbuf
);
644 void conv_utf8todisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
646 if (g_utf8_validate(inbuf
, -1, NULL
) == TRUE
)
647 strncpy2(outbuf
, inbuf
, outlen
);
649 conv_ustodisp(outbuf
, outlen
, inbuf
);
652 static gint
conv_anytodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
655 if (conv_anytoutf8(outbuf
, outlen
, inbuf
) < 0)
657 if (g_utf8_validate(outbuf
, -1, NULL
) != TRUE
)
658 conv_unreadable_8bit(outbuf
);
662 static gint
conv_ustodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
664 strncpy2(outbuf
, inbuf
, outlen
);
665 conv_unreadable_8bit(outbuf
);
670 void conv_localetodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
674 codeconv_set_strict(TRUE
);
675 tmpstr
= conv_iconv_strdup(inbuf
, conv_get_locale_charset_str(),
677 codeconv_set_strict(FALSE
);
678 if (tmpstr
&& g_utf8_validate(tmpstr
, -1, NULL
)) {
679 strncpy2(outbuf
, tmpstr
, outlen
);
682 } else if (tmpstr
&& !g_utf8_validate(tmpstr
, -1, NULL
)) {
684 codeconv_set_strict(TRUE
);
685 tmpstr
= conv_iconv_strdup(inbuf
,
686 conv_get_locale_charset_str_no_utf8(),
688 codeconv_set_strict(FALSE
);
690 if (tmpstr
&& g_utf8_validate(tmpstr
, -1, NULL
)) {
691 strncpy2(outbuf
, tmpstr
, outlen
);
696 conv_utf8todisp(outbuf
, outlen
, inbuf
);
700 static gint
conv_noconv(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
702 strncpy2(outbuf
, inbuf
, outlen
);
707 conv_get_fallback_for_private_encoding(const gchar
*encoding
)
709 if (encoding
&& (encoding
[0] == 'X' || encoding
[0] == 'x') &&
710 encoding
[1] == '-') {
711 if (!g_ascii_strcasecmp(encoding
, CS_X_MACCYR
))
713 if (!g_ascii_strcasecmp(encoding
, CS_X_GBK
))
720 CodeConverter
*conv_code_converter_new(const gchar
*src_charset
)
724 src_charset
= conv_get_fallback_for_private_encoding(src_charset
);
726 conv
= g_new0(CodeConverter
, 1);
727 conv
->code_conv_func
= conv_get_code_conv_func(src_charset
, NULL
);
728 conv
->charset_str
= g_strdup(src_charset
);
729 conv
->charset
= conv_get_charset_from_str(src_charset
);
734 void conv_code_converter_destroy(CodeConverter
*conv
)
736 g_free(conv
->charset_str
);
740 gint
conv_convert(CodeConverter
*conv
, gchar
*outbuf
, gint outlen
,
743 if (conv
->code_conv_func
!= conv_noconv
)
744 return conv
->code_conv_func(outbuf
, outlen
, inbuf
);
748 str
= conv_iconv_strdup(inbuf
, conv
->charset_str
, NULL
);
752 strncpy2(outbuf
, str
, outlen
);
760 gchar
*conv_codeset_strdup(const gchar
*inbuf
,
761 const gchar
*src_code
, const gchar
*dest_code
)
765 CodeConvFunc conv_func
;
767 if (!strcmp2(src_code
, dest_code
)) {
768 CharSet dest_charset
= conv_get_charset_from_str(dest_code
);
769 if (strict_mode
&& dest_charset
== C_UTF_8
) {
770 /* ensure valid UTF-8 if target is UTF-8 */
771 if (!g_utf8_validate(inbuf
, -1, NULL
)) {
775 /* otherwise, try for a lucky day */
776 return g_strdup(inbuf
);
779 src_code
= conv_get_fallback_for_private_encoding(src_code
);
780 conv_func
= conv_get_code_conv_func(src_code
, dest_code
);
781 if (conv_func
== conv_ustodisp
&& strict_mode
&& !is_ascii_str(inbuf
))
784 if (conv_func
!= conv_noconv
) {
785 len
= (strlen(inbuf
) + 1) * 3;
787 if (!buf
) return NULL
;
789 if (conv_func(buf
, len
, inbuf
) == 0 || !strict_mode
)
790 return g_realloc(buf
, strlen(buf
) + 1);
797 return conv_iconv_strdup(inbuf
, src_code
, dest_code
);
800 static CodeConvFunc
conv_get_code_conv_func(const gchar
*src_charset_str
,
801 const gchar
*dest_charset_str
)
803 CodeConvFunc code_conv
= conv_noconv
;
805 CharSet dest_charset
;
807 if (!src_charset_str
)
808 src_charset
= conv_get_locale_charset();
810 src_charset
= conv_get_charset_from_str(src_charset_str
);
812 /* auto detection mode */
813 if (!src_charset_str
&& !dest_charset_str
) {
814 if (conv_is_ja_locale())
815 return conv_anytodisp
;
820 dest_charset
= conv_get_charset_from_str(dest_charset_str
);
822 if (dest_charset
== C_US_ASCII
)
823 return conv_ustodisp
;
825 switch (src_charset
) {
843 case C_ISO_2022_JP_2
:
844 case C_ISO_2022_JP_3
:
845 if (dest_charset
== C_AUTO
)
846 code_conv
= conv_jistodisp
;
847 else if (dest_charset
== C_EUC_JP
)
848 code_conv
= conv_jistoeuc
;
849 else if (dest_charset
== C_UTF_8
)
850 code_conv
= conv_jistoutf8
;
853 if (dest_charset
== C_AUTO
)
854 code_conv
= conv_sjistodisp
;
855 else if (dest_charset
== C_EUC_JP
)
856 code_conv
= conv_sjistoeuc
;
857 else if (dest_charset
== C_UTF_8
)
858 code_conv
= conv_sjistoutf8
;
861 if (dest_charset
== C_AUTO
)
862 code_conv
= conv_euctodisp
;
863 else if (dest_charset
== C_ISO_2022_JP
||
864 dest_charset
== C_ISO_2022_JP_2
||
865 dest_charset
== C_ISO_2022_JP_3
)
866 code_conv
= conv_euctojis
;
867 else if (dest_charset
== C_UTF_8
)
868 code_conv
= conv_euctoutf8
;
871 if (dest_charset
== C_EUC_JP
)
872 code_conv
= conv_utf8toeuc
;
873 else if (dest_charset
== C_ISO_2022_JP
||
874 dest_charset
== C_ISO_2022_JP_2
||
875 dest_charset
== C_ISO_2022_JP_3
)
876 code_conv
= conv_utf8tojis
;
885 static gchar
*conv_iconv_strdup(const gchar
*inbuf
,
886 const gchar
*src_code
, const gchar
*dest_code
)
891 if (!src_code
&& !dest_code
&&
892 g_utf8_validate(inbuf
, -1, NULL
))
893 return g_strdup(inbuf
);
896 src_code
= conv_get_outgoing_charset_str();
898 dest_code
= CS_INTERNAL
;
900 /* don't convert if src and dest codeset are identical */
901 if (!strcasecmp(src_code
, dest_code
))
902 return g_strdup(inbuf
);
904 /* don't convert if dest codeset is US-ASCII */
905 if (!strcasecmp(src_code
, CS_US_ASCII
))
906 return g_strdup(inbuf
);
908 /* don't convert if dest codeset is US-ASCII */
909 if (!strcasecmp(dest_code
, CS_US_ASCII
))
910 return g_strdup(inbuf
);
912 cd
= iconv_open(dest_code
, src_code
);
913 if (cd
== (iconv_t
)-1)
916 outbuf
= conv_iconv_strdup_with_cd(inbuf
, cd
);
923 gchar
*conv_iconv_strdup_with_cd(const gchar
*inbuf
, iconv_t cd
)
925 const gchar
*inbuf_p
;
936 in_size
= strlen(inbuf
);
938 out_size
= (in_size
+ 1) * 2;
939 outbuf
= g_malloc(out_size
);
943 #define EXPAND_BUF() \
945 len = outbuf_p - outbuf; \
947 outbuf = g_realloc(outbuf, out_size); \
948 outbuf_p = outbuf + len; \
949 out_left = out_size - len; \
952 while ((n_conv
= iconv(cd
, (ICONV_CONST gchar
**)&inbuf_p
, &in_left
,
953 &outbuf_p
, &out_left
)) == (size_t)-1) {
954 if (EILSEQ
== errno
) {
959 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
965 *outbuf_p
++ = SUBST_CHAR
;
967 } else if (EINVAL
== errno
) {
969 } else if (E2BIG
== errno
) {
972 g_warning("conv_iconv_strdup(): %s",
978 while ((n_conv
= iconv(cd
, NULL
, NULL
, &outbuf_p
, &out_left
)) ==
980 if (E2BIG
== errno
) {
983 g_warning("conv_iconv_strdup(): %s",
991 len
= outbuf_p
- outbuf
;
992 outbuf
= g_realloc(outbuf
, len
+ 1);
998 static const struct {
1002 {C_US_ASCII
, CS_US_ASCII
},
1003 {C_US_ASCII
, CS_ANSI_X3_4_1968
},
1004 {C_UTF_8
, CS_UTF_8
},
1005 {C_UTF_7
, CS_UTF_7
},
1006 {C_ISO_8859_1
, CS_ISO_8859_1
},
1007 {C_ISO_8859_2
, CS_ISO_8859_2
},
1008 {C_ISO_8859_3
, CS_ISO_8859_3
},
1009 {C_ISO_8859_4
, CS_ISO_8859_4
},
1010 {C_ISO_8859_5
, CS_ISO_8859_5
},
1011 {C_ISO_8859_6
, CS_ISO_8859_6
},
1012 {C_ISO_8859_7
, CS_ISO_8859_7
},
1013 {C_ISO_8859_8
, CS_ISO_8859_8
},
1014 {C_ISO_8859_9
, CS_ISO_8859_9
},
1015 {C_ISO_8859_10
, CS_ISO_8859_10
},
1016 {C_ISO_8859_11
, CS_ISO_8859_11
},
1017 {C_ISO_8859_13
, CS_ISO_8859_13
},
1018 {C_ISO_8859_14
, CS_ISO_8859_14
},
1019 {C_ISO_8859_15
, CS_ISO_8859_15
},
1020 {C_BALTIC
, CS_BALTIC
},
1021 {C_CP1250
, CS_CP1250
},
1022 {C_CP1251
, CS_CP1251
},
1023 {C_CP1252
, CS_CP1252
},
1024 {C_CP1253
, CS_CP1253
},
1025 {C_CP1254
, CS_CP1254
},
1026 {C_CP1255
, CS_CP1255
},
1027 {C_CP1256
, CS_CP1256
},
1028 {C_CP1257
, CS_CP1257
},
1029 {C_CP1258
, CS_CP1258
},
1030 {C_WINDOWS_1250
, CS_WINDOWS_1250
},
1031 {C_WINDOWS_1251
, CS_WINDOWS_1251
},
1032 {C_WINDOWS_1252
, CS_WINDOWS_1252
},
1033 {C_WINDOWS_1253
, CS_WINDOWS_1253
},
1034 {C_WINDOWS_1254
, CS_WINDOWS_1254
},
1035 {C_WINDOWS_1255
, CS_WINDOWS_1255
},
1036 {C_WINDOWS_1256
, CS_WINDOWS_1256
},
1037 {C_WINDOWS_1257
, CS_WINDOWS_1257
},
1038 {C_WINDOWS_1258
, CS_WINDOWS_1258
},
1039 {C_KOI8_R
, CS_KOI8_R
},
1040 {C_MACCYR
, CS_MACCYR
},
1041 {C_KOI8_T
, CS_KOI8_T
},
1042 {C_KOI8_U
, CS_KOI8_U
},
1043 {C_ISO_2022_JP
, CS_ISO_2022_JP
},
1044 {C_ISO_2022_JP_2
, CS_ISO_2022_JP_2
},
1045 {C_ISO_2022_JP_3
, CS_ISO_2022_JP_3
},
1046 {C_EUC_JP
, CS_EUC_JP
},
1047 {C_EUC_JP
, CS_EUCJP
},
1048 {C_EUC_JP_MS
, CS_EUC_JP_MS
},
1049 {C_SHIFT_JIS
, CS_SHIFT_JIS
},
1050 {C_SHIFT_JIS
, CS_SHIFT__JIS
},
1051 {C_SHIFT_JIS
, CS_SJIS
},
1052 {C_ISO_2022_KR
, CS_ISO_2022_KR
},
1053 {C_EUC_KR
, CS_EUC_KR
},
1054 {C_ISO_2022_CN
, CS_ISO_2022_CN
},
1055 {C_EUC_CN
, CS_EUC_CN
},
1056 {C_GB18030
, CS_GB18030
},
1057 {C_GB2312
, CS_GB2312
},
1059 {C_EUC_TW
, CS_EUC_TW
},
1061 {C_BIG5_HKSCS
, CS_BIG5_HKSCS
},
1062 {C_TIS_620
, CS_TIS_620
},
1063 {C_WINDOWS_874
, CS_WINDOWS_874
},
1064 {C_GEORGIAN_PS
, CS_GEORGIAN_PS
},
1065 {C_TCVN5712_1
, CS_TCVN5712_1
},
1068 static const struct {
1069 gchar
*const locale
;
1071 CharSet out_charset
;
1072 } locale_table
[] = {
1073 {"ja_JP.eucJP" , C_EUC_JP
, C_ISO_2022_JP
},
1074 {"ja_JP.EUC-JP" , C_EUC_JP
, C_ISO_2022_JP
},
1075 {"ja_JP.EUC" , C_EUC_JP
, C_ISO_2022_JP
},
1076 {"ja_JP.ujis" , C_EUC_JP
, C_ISO_2022_JP
},
1077 {"ja_JP.SJIS" , C_SHIFT_JIS
, C_ISO_2022_JP
},
1078 {"ja_JP.JIS" , C_ISO_2022_JP
, C_ISO_2022_JP
},
1080 {"ja_JP" , C_SHIFT_JIS
, C_ISO_2022_JP
},
1082 {"ja_JP" , C_EUC_JP
, C_ISO_2022_JP
},
1084 {"ko_KR.EUC-KR" , C_EUC_KR
, C_EUC_KR
},
1085 {"ko_KR" , C_EUC_KR
, C_EUC_KR
},
1086 {"zh_CN.GB18030" , C_GB18030
, C_GB18030
},
1087 {"zh_CN.GB2312" , C_GB2312
, C_GB2312
},
1088 {"zh_CN.GBK" , C_GBK
, C_GBK
},
1089 {"zh_CN" , C_GB18030
, C_GB18030
},
1090 {"zh_HK" , C_BIG5_HKSCS
, C_BIG5_HKSCS
},
1091 {"zh_TW.eucTW" , C_EUC_TW
, C_BIG5
},
1092 {"zh_TW.EUC-TW" , C_EUC_TW
, C_BIG5
},
1093 {"zh_TW.Big5" , C_BIG5
, C_BIG5
},
1094 {"zh_TW" , C_BIG5
, C_BIG5
},
1096 {"ru_RU.KOI8-R" , C_KOI8_R
, C_KOI8_R
},
1097 {"ru_RU.KOI8R" , C_KOI8_R
, C_KOI8_R
},
1098 {"ru_RU.CP1251" , C_WINDOWS_1251
, C_KOI8_R
},
1100 {"ru_RU" , C_WINDOWS_1251
, C_KOI8_R
},
1102 {"ru_RU" , C_ISO_8859_5
, C_KOI8_R
},
1104 {"tg_TJ" , C_KOI8_T
, C_KOI8_T
},
1105 {"ru_UA" , C_KOI8_U
, C_KOI8_U
},
1106 {"uk_UA.CP1251" , C_WINDOWS_1251
, C_KOI8_U
},
1107 {"uk_UA" , C_KOI8_U
, C_KOI8_U
},
1109 {"be_BY" , C_WINDOWS_1251
, C_WINDOWS_1251
},
1110 {"bg_BG" , C_WINDOWS_1251
, C_WINDOWS_1251
},
1112 {"yi_US" , C_WINDOWS_1255
, C_WINDOWS_1255
},
1114 {"af_ZA" , C_ISO_8859_1
, C_ISO_8859_1
},
1115 {"br_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1116 {"ca_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1117 {"da_DK" , C_ISO_8859_1
, C_ISO_8859_1
},
1118 {"de_AT" , C_ISO_8859_1
, C_ISO_8859_1
},
1119 {"de_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1120 {"de_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1121 {"de_DE" , C_ISO_8859_1
, C_ISO_8859_1
},
1122 {"de_LU" , C_ISO_8859_1
, C_ISO_8859_1
},
1123 {"en_AU" , C_ISO_8859_1
, C_ISO_8859_1
},
1124 {"en_BW" , C_ISO_8859_1
, C_ISO_8859_1
},
1125 {"en_CA" , C_ISO_8859_1
, C_ISO_8859_1
},
1126 {"en_DK" , C_ISO_8859_1
, C_ISO_8859_1
},
1127 {"en_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1128 {"en_HK" , C_ISO_8859_1
, C_ISO_8859_1
},
1129 {"en_IE" , C_ISO_8859_1
, C_ISO_8859_1
},
1130 {"en_NZ" , C_ISO_8859_1
, C_ISO_8859_1
},
1131 {"en_PH" , C_ISO_8859_1
, C_ISO_8859_1
},
1132 {"en_SG" , C_ISO_8859_1
, C_ISO_8859_1
},
1133 {"en_US" , C_ISO_8859_1
, C_ISO_8859_1
},
1134 {"en_ZA" , C_ISO_8859_1
, C_ISO_8859_1
},
1135 {"en_ZW" , C_ISO_8859_1
, C_ISO_8859_1
},
1136 {"es_AR" , C_ISO_8859_1
, C_ISO_8859_1
},
1137 {"es_BO" , C_ISO_8859_1
, C_ISO_8859_1
},
1138 {"es_CL" , C_ISO_8859_1
, C_ISO_8859_1
},
1139 {"es_CO" , C_ISO_8859_1
, C_ISO_8859_1
},
1140 {"es_CR" , C_ISO_8859_1
, C_ISO_8859_1
},
1141 {"es_DO" , C_ISO_8859_1
, C_ISO_8859_1
},
1142 {"es_EC" , C_ISO_8859_1
, C_ISO_8859_1
},
1143 {"es_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1144 {"es_GT" , C_ISO_8859_1
, C_ISO_8859_1
},
1145 {"es_HN" , C_ISO_8859_1
, C_ISO_8859_1
},
1146 {"es_MX" , C_ISO_8859_1
, C_ISO_8859_1
},
1147 {"es_NI" , C_ISO_8859_1
, C_ISO_8859_1
},
1148 {"es_PA" , C_ISO_8859_1
, C_ISO_8859_1
},
1149 {"es_PE" , C_ISO_8859_1
, C_ISO_8859_1
},
1150 {"es_PR" , C_ISO_8859_1
, C_ISO_8859_1
},
1151 {"es_PY" , C_ISO_8859_1
, C_ISO_8859_1
},
1152 {"es_SV" , C_ISO_8859_1
, C_ISO_8859_1
},
1153 {"es_US" , C_ISO_8859_1
, C_ISO_8859_1
},
1154 {"es_UY" , C_ISO_8859_1
, C_ISO_8859_1
},
1155 {"es_VE" , C_ISO_8859_1
, C_ISO_8859_1
},
1156 {"et_EE" , C_ISO_8859_1
, C_ISO_8859_1
},
1157 {"eu_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1158 {"fi_FI" , C_ISO_8859_1
, C_ISO_8859_1
},
1159 {"fo_FO" , C_ISO_8859_1
, C_ISO_8859_1
},
1160 {"fr_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1161 {"fr_CA" , C_ISO_8859_1
, C_ISO_8859_1
},
1162 {"fr_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1163 {"fr_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1164 {"fr_LU" , C_ISO_8859_1
, C_ISO_8859_1
},
1165 {"ga_IE" , C_ISO_8859_1
, C_ISO_8859_1
},
1166 {"gl_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1167 {"gv_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1168 {"id_ID" , C_ISO_8859_1
, C_ISO_8859_1
},
1169 {"is_IS" , C_ISO_8859_1
, C_ISO_8859_1
},
1170 {"it_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1171 {"it_IT" , C_ISO_8859_1
, C_ISO_8859_1
},
1172 {"kl_GL" , C_ISO_8859_1
, C_ISO_8859_1
},
1173 {"kw_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1174 {"ms_MY" , C_ISO_8859_1
, C_ISO_8859_1
},
1175 {"nl_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1176 {"nl_NL" , C_ISO_8859_1
, C_ISO_8859_1
},
1177 {"nb_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1178 {"nn_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1179 {"no_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1180 {"oc_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1181 {"pt_BR" , C_ISO_8859_1
, C_ISO_8859_1
},
1182 {"pt_PT" , C_ISO_8859_1
, C_ISO_8859_1
},
1183 {"sq_AL" , C_ISO_8859_1
, C_ISO_8859_1
},
1184 {"sv_FI" , C_ISO_8859_1
, C_ISO_8859_1
},
1185 {"sv_SE" , C_ISO_8859_1
, C_ISO_8859_1
},
1186 {"tl_PH" , C_ISO_8859_1
, C_ISO_8859_1
},
1187 {"uz_UZ" , C_ISO_8859_1
, C_ISO_8859_1
},
1188 {"wa_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1190 {"bs_BA" , C_ISO_8859_2
, C_ISO_8859_2
},
1191 {"cs_CZ" , C_ISO_8859_2
, C_ISO_8859_2
},
1192 {"hr_HR" , C_ISO_8859_2
, C_ISO_8859_2
},
1193 {"hu_HU" , C_ISO_8859_2
, C_ISO_8859_2
},
1194 {"pl_PL" , C_ISO_8859_2
, C_ISO_8859_2
},
1195 {"ro_RO" , C_ISO_8859_2
, C_ISO_8859_2
},
1196 {"sk_SK" , C_ISO_8859_2
, C_ISO_8859_2
},
1197 {"sl_SI" , C_ISO_8859_2
, C_ISO_8859_2
},
1199 {"sr_YU@cyrillic" , C_ISO_8859_5
, C_ISO_8859_5
},
1200 {"sr_YU" , C_ISO_8859_2
, C_ISO_8859_2
},
1202 {"mt_MT" , C_ISO_8859_3
, C_ISO_8859_3
},
1204 {"lt_LT.iso88594" , C_ISO_8859_4
, C_ISO_8859_4
},
1205 {"lt_LT.ISO8859-4" , C_ISO_8859_4
, C_ISO_8859_4
},
1206 {"lt_LT.ISO_8859-4" , C_ISO_8859_4
, C_ISO_8859_4
},
1207 {"lt_LT" , C_ISO_8859_13
, C_ISO_8859_13
},
1209 {"mk_MK" , C_ISO_8859_5
, C_ISO_8859_5
},
1211 {"ar_AE" , C_ISO_8859_6
, C_ISO_8859_6
},
1212 {"ar_BH" , C_ISO_8859_6
, C_ISO_8859_6
},
1213 {"ar_DZ" , C_ISO_8859_6
, C_ISO_8859_6
},
1214 {"ar_EG" , C_ISO_8859_6
, C_ISO_8859_6
},
1215 {"ar_IQ" , C_ISO_8859_6
, C_ISO_8859_6
},
1216 {"ar_JO" , C_ISO_8859_6
, C_ISO_8859_6
},
1217 {"ar_KW" , C_ISO_8859_6
, C_ISO_8859_6
},
1218 {"ar_LB" , C_ISO_8859_6
, C_ISO_8859_6
},
1219 {"ar_LY" , C_ISO_8859_6
, C_ISO_8859_6
},
1220 {"ar_MA" , C_ISO_8859_6
, C_ISO_8859_6
},
1221 {"ar_OM" , C_ISO_8859_6
, C_ISO_8859_6
},
1222 {"ar_QA" , C_ISO_8859_6
, C_ISO_8859_6
},
1223 {"ar_SA" , C_ISO_8859_6
, C_ISO_8859_6
},
1224 {"ar_SD" , C_ISO_8859_6
, C_ISO_8859_6
},
1225 {"ar_SY" , C_ISO_8859_6
, C_ISO_8859_6
},
1226 {"ar_TN" , C_ISO_8859_6
, C_ISO_8859_6
},
1227 {"ar_YE" , C_ISO_8859_6
, C_ISO_8859_6
},
1229 {"el_GR" , C_ISO_8859_7
, C_ISO_8859_7
},
1230 {"he_IL" , C_ISO_8859_8
, C_ISO_8859_8
},
1231 {"iw_IL" , C_ISO_8859_8
, C_ISO_8859_8
},
1232 {"tr_TR" , C_ISO_8859_9
, C_ISO_8859_9
},
1234 {"lv_LV" , C_ISO_8859_13
, C_ISO_8859_13
},
1235 {"mi_NZ" , C_ISO_8859_13
, C_ISO_8859_13
},
1237 {"cy_GB" , C_ISO_8859_14
, C_ISO_8859_14
},
1239 {"ar_IN" , C_UTF_8
, C_UTF_8
},
1240 {"en_IN" , C_UTF_8
, C_UTF_8
},
1241 {"se_NO" , C_UTF_8
, C_UTF_8
},
1242 {"ta_IN" , C_UTF_8
, C_UTF_8
},
1243 {"te_IN" , C_UTF_8
, C_UTF_8
},
1244 {"ur_PK" , C_UTF_8
, C_UTF_8
},
1246 {"th_TH" , C_TIS_620
, C_TIS_620
},
1247 /* {"th_TH" , C_WINDOWS_874}, */
1248 /* {"th_TH" , C_ISO_8859_11}, */
1250 {"ka_GE" , C_GEORGIAN_PS
, C_GEORGIAN_PS
},
1251 {"vi_VN.TCVN" , C_TCVN5712_1
, C_TCVN5712_1
},
1253 {"C" , C_US_ASCII
, C_US_ASCII
},
1254 {"POSIX" , C_US_ASCII
, C_US_ASCII
},
1255 {"ANSI_X3.4-1968" , C_US_ASCII
, C_US_ASCII
},
1258 static GHashTable
*conv_get_charset_to_str_table(void)
1260 static GHashTable
*table
;
1266 table
= g_hash_table_new(NULL
, g_direct_equal
);
1268 for (i
= 0; i
< sizeof(charsets
) / sizeof(charsets
[0]); i
++) {
1269 if (g_hash_table_lookup(table
, GUINT_TO_POINTER(charsets
[i
].charset
))
1272 (table
, GUINT_TO_POINTER(charsets
[i
].charset
),
1280 static GHashTable
*conv_get_charset_from_str_table(void)
1282 static GHashTable
*table
;
1288 table
= g_hash_table_new(str_case_hash
, str_case_equal
);
1290 for (i
= 0; i
< sizeof(charsets
) / sizeof(charsets
[0]); i
++) {
1291 g_hash_table_insert(table
, charsets
[i
].name
,
1292 GUINT_TO_POINTER(charsets
[i
].charset
));
1298 const gchar
*conv_get_charset_str(CharSet charset
)
1302 table
= conv_get_charset_to_str_table();
1303 return g_hash_table_lookup(table
, GUINT_TO_POINTER(charset
));
1306 CharSet
conv_get_charset_from_str(const gchar
*charset
)
1310 if (!charset
) return C_AUTO
;
1312 table
= conv_get_charset_from_str_table();
1313 return GPOINTER_TO_UINT(g_hash_table_lookup(table
, charset
));
1316 static CharSet
conv_get_locale_charset(void)
1318 static CharSet cur_charset
= -1;
1319 const gchar
*cur_locale
;
1323 if (cur_charset
!= -1)
1326 cur_locale
= conv_get_current_locale();
1328 cur_charset
= C_US_ASCII
;
1332 if (strcasestr(cur_locale
, "UTF-8") ||
1333 strcasestr(cur_locale
, "utf8")) {
1334 cur_charset
= C_UTF_8
;
1338 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1339 cur_charset
= C_ISO_8859_15
;
1343 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1346 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1347 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1348 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1349 strlen(locale_table
[i
].locale
))) {
1350 cur_charset
= locale_table
[i
].charset
;
1352 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1353 !strchr(p
+ 1, '.')) {
1354 if (strlen(cur_locale
) == 2 &&
1355 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1356 cur_charset
= locale_table
[i
].charset
;
1362 cur_charset
= C_AUTO
;
1366 static CharSet
conv_get_locale_charset_no_utf8(void)
1368 static CharSet cur_charset
= -1;
1369 const gchar
*cur_locale
;
1373 if (prefs_common
.broken_are_utf8
) {
1374 cur_charset
= C_UTF_8
;
1378 cur_locale
= conv_get_current_locale();
1380 cur_charset
= C_US_ASCII
;
1384 if (strcasestr(cur_locale
, "UTF-8") ||
1385 strcasestr(cur_locale
, "utf8")) {
1386 cur_charset
= C_UTF_8
;
1390 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1391 cur_charset
= C_ISO_8859_15
;
1395 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1398 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1399 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1400 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1401 strlen(locale_table
[i
].locale
))) {
1402 cur_charset
= locale_table
[i
].charset
;
1404 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1405 !strchr(p
+ 1, '.')) {
1406 if (strlen(cur_locale
) == 2 &&
1407 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1408 cur_charset
= locale_table
[i
].charset
;
1414 cur_charset
= C_AUTO
;
1418 const gchar
*conv_get_locale_charset_str(void)
1420 static const gchar
*codeset
= NULL
;
1423 codeset
= conv_get_charset_str(conv_get_locale_charset());
1425 return codeset
? codeset
: CS_INTERNAL
;
1428 const gchar
*conv_get_locale_charset_str_no_utf8(void)
1430 static const gchar
*codeset
= NULL
;
1433 codeset
= conv_get_charset_str(conv_get_locale_charset_no_utf8());
1435 return codeset
? codeset
: CS_INTERNAL
;
1438 static CharSet
conv_get_outgoing_charset(void)
1440 static CharSet out_charset
= -1;
1441 const gchar
*cur_locale
;
1445 if (out_charset
!= -1)
1448 cur_locale
= conv_get_current_locale();
1450 out_charset
= C_AUTO
;
1454 if (strcasestr(cur_locale
, "UTF-8") ||
1455 strcasestr(cur_locale
, "utf8")) {
1456 out_charset
= C_UTF_8
;
1460 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1461 out_charset
= C_ISO_8859_15
;
1465 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1468 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1469 strlen(locale_table
[i
].locale
))) {
1470 out_charset
= locale_table
[i
].out_charset
;
1472 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1473 !strchr(p
+ 1, '.')) {
1474 if (strlen(cur_locale
) == 2 &&
1475 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1476 out_charset
= locale_table
[i
].out_charset
;
1485 const gchar
*conv_get_outgoing_charset_str(void)
1487 CharSet out_charset
;
1490 out_charset
= conv_get_outgoing_charset();
1491 str
= conv_get_charset_str(out_charset
);
1493 return str
? str
: CS_UTF_8
;
1496 const gchar
*conv_get_current_locale(void)
1498 const gchar
*cur_locale
;
1501 cur_locale
= g_win32_getlocale();
1503 cur_locale
= g_getenv("LC_ALL");
1504 if (!cur_locale
) cur_locale
= g_getenv("LC_CTYPE");
1505 if (!cur_locale
) cur_locale
= g_getenv("LANG");
1506 if (!cur_locale
) cur_locale
= setlocale(LC_CTYPE
, NULL
);
1507 #endif /* G_OS_WIN32 */
1509 debug_print("current locale: %s\n",
1510 cur_locale
? cur_locale
: "(none)");
1515 static gboolean
conv_is_ja_locale(void)
1517 static gint is_ja_locale
= -1;
1518 const gchar
*cur_locale
;
1520 if (is_ja_locale
!= -1)
1521 return is_ja_locale
!= 0;
1524 cur_locale
= conv_get_current_locale();
1526 if (g_ascii_strncasecmp(cur_locale
, "ja", 2) == 0)
1530 return is_ja_locale
!= 0;
1533 gchar
*conv_unmime_header(const gchar
*str
, const gchar
*default_encoding
,
1534 gboolean addr_field
)
1536 gchar buf
[BUFFSIZE
];
1538 if (is_ascii_str(str
))
1539 return unmime_header(str
, addr_field
);
1541 if (default_encoding
) {
1544 utf8_buf
= conv_codeset_strdup
1545 (str
, default_encoding
, CS_INTERNAL
);
1549 decoded_str
= unmime_header(utf8_buf
, addr_field
);
1555 if (conv_is_ja_locale())
1556 conv_anytodisp(buf
, sizeof(buf
), str
);
1558 conv_localetodisp(buf
, sizeof(buf
), str
);
1560 return unmime_header(buf
, addr_field
);
1563 #define MAX_LINELEN 76
1564 #define MAX_HARD_LINELEN 996
1565 #define MIMESEP_BEGIN "=?"
1566 #define MIMESEP_END "?="
1568 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1570 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1575 if ((cond) && *srcp) { \
1576 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1577 if (isspace(*(destp - 1))) \
1579 else if (is_plain_text && isspace(*srcp)) \
1584 left = MAX_LINELEN - 1; \
1586 } else if (destp == (guchar *)dest && left < 7) { \
1587 if (is_plain_text && isspace(*srcp)) \
1592 left = MAX_LINELEN - 1; \
1598 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1600 void conv_encode_header_full(gchar
*dest
, gint len
, const gchar
*src
,
1601 gint header_len
, gboolean addr_field
,
1602 const gchar
*out_encoding_
)
1604 const gchar
*cur_encoding
;
1605 const gchar
*out_encoding
;
1609 const guchar
*srcp
= src
;
1610 guchar
*destp
= dest
;
1611 gboolean use_base64
;
1613 cm_return_if_fail(g_utf8_validate(src
, -1, NULL
) == TRUE
);
1614 cm_return_if_fail(destp
!= NULL
);
1616 if (MB_CUR_MAX
> 1) {
1618 mimesep_enc
= "?B?";
1621 mimesep_enc
= "?Q?";
1624 cur_encoding
= CS_INTERNAL
;
1627 out_encoding
= out_encoding_
;
1629 out_encoding
= conv_get_outgoing_charset_str();
1631 if (!strcmp(out_encoding
, CS_US_ASCII
))
1632 out_encoding
= CS_ISO_8859_1
;
1634 mimestr_len
= strlen(MIMESEP_BEGIN
) + strlen(out_encoding
) +
1635 strlen(mimesep_enc
) + strlen(MIMESEP_END
);
1637 left
= MAX_LINELEN
- header_len
;
1640 LBREAK_IF_REQUIRED(left
<= 0, TRUE
);
1642 while (isspace(*srcp
)) {
1645 LBREAK_IF_REQUIRED(left
<= 0, TRUE
);
1648 /* output as it is if the next word is ASCII string */
1649 if (!is_next_nonascii(srcp
)) {
1652 word_len
= get_next_word_len(srcp
);
1653 LBREAK_IF_REQUIRED(left
< word_len
, TRUE
);
1654 while (word_len
> 0) {
1655 LBREAK_IF_REQUIRED(left
+ (MAX_HARD_LINELEN
- MAX_LINELEN
) <= 0, TRUE
)
1664 /* don't include parentheses and quotes in encoded strings */
1665 if (addr_field
&& (*srcp
== '(' || *srcp
== ')' || *srcp
== '"')) {
1666 LBREAK_IF_REQUIRED(left
< 2, FALSE
);
1677 const guchar
*p
= srcp
;
1679 gint out_enc_str_len
;
1680 gint mime_block_len
;
1681 gboolean cont
= FALSE
;
1683 while (*p
!= '\0') {
1684 if (isspace(*p
) && !is_next_nonascii(p
+ 1))
1686 /* don't include parentheses in encoded
1688 if (addr_field
&& (*p
== '(' || *p
== ')' || *p
== '"'))
1691 mb_len
= g_utf8_skip
[*p
];
1693 Xstrndup_a(part_str
, srcp
, cur_len
+ mb_len
, );
1694 out_str
= conv_codeset_strdup
1695 (part_str
, cur_encoding
, out_encoding
);
1701 g_warning("conv_encode_header(): code conversion failed");
1702 conv_unreadable_8bit(part_str
);
1703 out_str
= g_strdup(part_str
);
1706 out_str_len
= strlen(out_str
);
1709 out_enc_str_len
= B64LEN(out_str_len
);
1712 qp_get_q_encoding_len(out_str
);
1716 if (mimestr_len
+ out_enc_str_len
<= left
) {
1719 } else if (cur_len
== 0) {
1721 LBREAK_IF_REQUIRED(1, FALSE
);
1730 Xstrndup_a(part_str
, srcp
, cur_len
, );
1731 out_str
= conv_codeset_strdup
1732 (part_str
, cur_encoding
, out_encoding
);
1734 g_warning("conv_encode_header(): code conversion failed");
1735 conv_unreadable_8bit(part_str
);
1736 out_str
= g_strdup(part_str
);
1738 out_str_len
= strlen(out_str
);
1741 out_enc_str_len
= B64LEN(out_str_len
);
1744 qp_get_q_encoding_len(out_str
);
1747 enc_str
= g_base64_encode(out_str
, out_str_len
);
1749 Xalloca(enc_str
, out_enc_str_len
+ 1, );
1750 qp_q_encode(enc_str
, out_str
);
1755 /* output MIME-encoded string block */
1756 mime_block_len
= mimestr_len
+ strlen(enc_str
);
1757 g_snprintf(destp
, mime_block_len
+ 1,
1758 MIMESEP_BEGIN
"%s%s%s" MIMESEP_END
,
1759 out_encoding
, mimesep_enc
, enc_str
);
1764 destp
+= mime_block_len
;
1767 left
-= mime_block_len
;
1770 LBREAK_IF_REQUIRED(cont
, FALSE
);
1780 void conv_encode_header(gchar
*dest
, gint len
, const gchar
*src
,
1781 gint header_len
, gboolean addr_field
)
1783 conv_encode_header_full(dest
,len
,src
,header_len
,addr_field
,NULL
);
1786 #undef LBREAK_IF_REQUIRED
1789 gchar
*conv_filename_from_utf8(const gchar
*utf8_file
)
1792 GError
*error
= NULL
;
1794 fs_file
= g_filename_from_utf8(utf8_file
, -1, NULL
, NULL
, &error
);
1796 debug_print("failed to convert encoding of file name: %s\n",
1798 g_error_free(error
);
1801 fs_file
= g_strdup(utf8_file
);
1806 gchar
*conv_filename_to_utf8(const gchar
*fs_file
)
1808 gchar
*utf8_file
= NULL
;
1809 GError
*error
= NULL
;
1811 utf8_file
= g_filename_to_utf8(fs_file
, -1, NULL
, NULL
, &error
);
1813 g_warning("failed to convert encoding of file name: %s",
1815 g_error_free(error
);
1818 if (!utf8_file
|| !g_utf8_validate(utf8_file
, -1, NULL
)) {
1820 utf8_file
= g_strdup(fs_file
);
1821 conv_unreadable_8bit(utf8_file
);