2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2007 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
44 /* For unknown reasons the inconv.m4 macro undefs that macro if no
45 const is needed. This would break the code below so we define it. */
58 #define SUBST_CHAR 0x5f;
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 static CodeConvFunc
conv_get_code_conv_func (const gchar
*src_charset_str
,
112 const gchar
*dest_charset_str
);
114 static gchar
*conv_iconv_strdup_with_cd (const gchar
*inbuf
,
117 static CharSet
conv_get_locale_charset (void);
118 static CharSet
conv_get_outgoing_charset (void);
119 static CharSet
conv_guess_ja_encoding(const gchar
*str
);
120 static gboolean
conv_is_ja_locale (void);
122 static void conv_jistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
123 static void conv_euctojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
124 static void conv_sjistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
126 static void conv_jistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
127 static void conv_sjistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
128 static void conv_euctoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
129 static void conv_anytoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
131 static void conv_utf8toeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
132 static void conv_utf8tojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
134 static void conv_unreadable_8bit(gchar
*str
);
136 static void conv_jistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
137 static void conv_sjistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
138 static void conv_euctodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
140 static void conv_anytodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
141 static void conv_ustodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
142 static void conv_noconv(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
144 static gboolean strict_mode
= FALSE
;
146 void codeconv_set_strict(gboolean mode
)
151 static void conv_jistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
153 const guchar
*in
= inbuf
;
154 guchar
*out
= outbuf
;
155 JISState state
= JIS_ASCII
;
157 while (*in
!= '\0') {
161 if (*(in
+ 1) == '@' || *(in
+ 1) == 'B') {
164 } else if (*(in
+ 1) == '(' &&
166 state
= JIS_AUXKANJI
;
169 /* unknown escape sequence */
172 } else if (*in
== '(') {
173 if (*(in
+ 1) == 'B' || *(in
+ 1) == 'J') {
176 } else if (*(in
+ 1) == 'I') {
180 /* unknown escape sequence */
184 /* unknown escape sequence */
187 } else if (*in
== 0x0e) {
190 } else if (*in
== 0x0f) {
199 *out
++ = *in
++ | 0x80;
200 if (*in
== '\0') break;
201 *out
++ = *in
++ | 0x80;
205 *out
++ = *in
++ | 0x80;
209 *out
++ = *in
++ | 0x80;
210 if (*in
== '\0') break;
211 *out
++ = *in
++ | 0x80;
220 #define JIS_HWDAKUTEN 0x5e
221 #define JIS_HWHANDAKUTEN 0x5f
223 static gint
conv_jis_hantozen(guchar
*outbuf
, guchar jis_code
, guchar sound_sym
)
225 static guint16 h2z_tbl
[] = {
227 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
228 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
230 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
231 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
233 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
234 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
236 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
237 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
240 static guint16 dakuten_tbl
[] = {
242 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
243 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
245 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
246 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
249 static guint16 handakuten_tbl
[] = {
251 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
259 if (jis_code
< 0x21 || jis_code
> 0x5f)
262 if (sound_sym
== JIS_HWDAKUTEN
&&
263 jis_code
>= 0x36 && jis_code
<= 0x4e) {
264 out_code
= dakuten_tbl
[jis_code
- 0x30];
266 *outbuf
= out_code
>> 8;
267 *(outbuf
+ 1) = out_code
& 0xff;
272 if (sound_sym
== JIS_HWHANDAKUTEN
&&
273 jis_code
>= 0x4a && jis_code
<= 0x4e) {
274 out_code
= handakuten_tbl
[jis_code
- 0x4a];
275 *outbuf
= out_code
>> 8;
276 *(outbuf
+ 1) = out_code
& 0xff;
280 out_code
= h2z_tbl
[jis_code
- 0x20];
281 *outbuf
= out_code
>> 8;
282 *(outbuf
+ 1) = out_code
& 0xff;
286 static void conv_euctojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
288 const guchar
*in
= inbuf
;
289 guchar
*out
= outbuf
;
290 JISState state
= JIS_ASCII
;
292 while (*in
!= '\0') {
296 } else if (iseuckanji(*in
)) {
297 if (iseuckanji(*(in
+ 1))) {
299 *out
++ = *in
++ & 0x7f;
300 *out
++ = *in
++ & 0x7f;
305 if (*in
!= '\0' && !IS_ASCII(*in
)) {
310 } else if (iseuchwkana1(*in
)) {
311 if (iseuchwkana2(*(in
+ 1))) {
312 if (prefs_common
.allow_jisx0201_kana
) {
315 *out
++ = *in
++ & 0x7f;
320 if (iseuchwkana1(*(in
+ 2)) &&
321 iseuchwkana2(*(in
+ 3)))
322 len
= conv_jis_hantozen
324 *(in
+ 1), *(in
+ 3));
326 len
= conv_jis_hantozen
341 if (*in
!= '\0' && !IS_ASCII(*in
)) {
346 } else if (iseucaux(*in
)) {
348 if (iseuckanji(*in
) && iseuckanji(*(in
+ 1))) {
350 *out
++ = *in
++ & 0x7f;
351 *out
++ = *in
++ & 0x7f;
354 if (*in
!= '\0' && !IS_ASCII(*in
)) {
357 if (*in
!= '\0' && !IS_ASCII(*in
)) {
374 static void conv_sjistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
376 const guchar
*in
= inbuf
;
377 guchar
*out
= outbuf
;
379 while (*in
!= '\0') {
382 } else if (issjiskanji1(*in
)) {
383 if (issjiskanji2(*(in
+ 1))) {
385 guchar out2
= *(in
+ 1);
388 row
= out1
< 0xa0 ? 0x70 : 0xb0;
390 out1
= (out1
- row
) * 2 - 1;
391 out2
-= out2
> 0x7f ? 0x20 : 0x1f;
393 out1
= (out1
- row
) * 2;
397 *out
++ = out1
| 0x80;
398 *out
++ = out2
| 0x80;
403 if (*in
!= '\0' && !IS_ASCII(*in
)) {
408 } else if (issjishwkana(*in
)) {
420 static void conv_jistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
424 Xalloca(eucstr
, outlen
, return);
426 conv_jistoeuc(eucstr
, outlen
, inbuf
);
427 conv_euctoutf8(outbuf
, outlen
, eucstr
);
430 static void conv_sjistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
434 tmpstr
= conv_iconv_strdup(inbuf
, CS_SHIFT_JIS
, CS_UTF_8
);
436 strncpy2(outbuf
, tmpstr
, outlen
);
439 strncpy2(outbuf
, inbuf
, outlen
);
442 static void conv_euctoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
444 static iconv_t cd
= (iconv_t
)-1;
445 static gboolean iconv_ok
= TRUE
;
448 if (cd
== (iconv_t
)-1) {
450 strncpy2(outbuf
, inbuf
, outlen
);
453 cd
= iconv_open(CS_UTF_8
, CS_EUC_JP_MS
);
454 if (cd
== (iconv_t
)-1) {
455 cd
= iconv_open(CS_UTF_8
, CS_EUC_JP
);
456 if (cd
== (iconv_t
)-1) {
457 g_warning("conv_euctoutf8(): %s\n",
460 strncpy2(outbuf
, inbuf
, outlen
);
466 tmpstr
= conv_iconv_strdup_with_cd(inbuf
, cd
);
468 strncpy2(outbuf
, tmpstr
, outlen
);
471 strncpy2(outbuf
, inbuf
, outlen
);
474 static void conv_anytoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
476 switch (conv_guess_ja_encoding(inbuf
)) {
478 conv_jistoutf8(outbuf
, outlen
, inbuf
);
481 conv_sjistoutf8(outbuf
, outlen
, inbuf
);
484 conv_euctoutf8(outbuf
, outlen
, inbuf
);
487 strncpy2(outbuf
, inbuf
, outlen
);
492 static void conv_utf8toeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
494 static iconv_t cd
= (iconv_t
)-1;
495 static gboolean iconv_ok
= TRUE
;
498 if (cd
== (iconv_t
)-1) {
500 strncpy2(outbuf
, inbuf
, outlen
);
503 cd
= iconv_open(CS_EUC_JP_MS
, CS_UTF_8
);
504 if (cd
== (iconv_t
)-1) {
505 cd
= iconv_open(CS_EUC_JP
, CS_UTF_8
);
506 if (cd
== (iconv_t
)-1) {
507 g_warning("conv_utf8toeuc(): %s\n",
510 strncpy2(outbuf
, inbuf
, outlen
);
516 tmpstr
= conv_iconv_strdup_with_cd(inbuf
, cd
);
518 strncpy2(outbuf
, tmpstr
, outlen
);
521 strncpy2(outbuf
, inbuf
, outlen
);
524 static void conv_utf8tojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
528 Xalloca(eucstr
, outlen
, return);
530 conv_utf8toeuc(eucstr
, outlen
, inbuf
);
531 conv_euctojis(outbuf
, outlen
, eucstr
);
534 static void conv_unreadable_8bit(gchar
*str
)
536 register guchar
*p
= str
;
539 /* convert CR+LF -> LF */
540 if (*p
== '\r' && *(p
+ 1) == '\n')
541 memmove(p
, p
+ 1, strlen(p
));
542 else if (!IS_ASCII(*p
)) *p
= SUBST_CHAR
;
547 static CharSet
conv_guess_ja_encoding(const gchar
*str
)
549 const guchar
*p
= str
;
550 CharSet guessed
= C_US_ASCII
;
553 if (*p
== ESC
&& (*(p
+ 1) == '$' || *(p
+ 1) == '(')) {
554 if (guessed
== C_US_ASCII
)
555 return C_ISO_2022_JP
;
557 } else if (IS_ASCII(*p
)) {
559 } else if (iseuckanji(*p
) && iseuckanji(*(p
+ 1))) {
560 if (*p
>= 0xfd && *p
<= 0xfe)
562 else if (guessed
== C_SHIFT_JIS
) {
563 if ((issjiskanji1(*p
) &&
564 issjiskanji2(*(p
+ 1))) ||
566 guessed
= C_SHIFT_JIS
;
572 } else if (issjiskanji1(*p
) && issjiskanji2(*(p
+ 1))) {
573 if (iseuchwkana1(*p
) && iseuchwkana2(*(p
+ 1)))
574 guessed
= C_SHIFT_JIS
;
578 } else if (issjishwkana(*p
)) {
579 guessed
= C_SHIFT_JIS
;
589 static void conv_jistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
591 conv_jistoutf8(outbuf
, outlen
, inbuf
);
594 static void conv_sjistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
596 conv_sjistoutf8(outbuf
, outlen
, inbuf
);
599 static void conv_euctodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
601 conv_euctoutf8(outbuf
, outlen
, inbuf
);
604 void conv_utf8todisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
606 if (g_utf8_validate(inbuf
, -1, NULL
) == TRUE
)
607 strncpy2(outbuf
, inbuf
, outlen
);
609 conv_ustodisp(outbuf
, outlen
, inbuf
);
612 static void conv_anytodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
614 conv_anytoutf8(outbuf
, outlen
, inbuf
);
615 if (g_utf8_validate(outbuf
, -1, NULL
) != TRUE
)
616 conv_unreadable_8bit(outbuf
);
619 static void conv_ustodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
621 strncpy2(outbuf
, inbuf
, outlen
);
622 conv_unreadable_8bit(outbuf
);
625 void conv_localetodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
629 codeconv_set_strict(TRUE
);
630 tmpstr
= conv_iconv_strdup(inbuf
, conv_get_locale_charset_str(),
632 codeconv_set_strict(FALSE
);
633 if (tmpstr
&& g_utf8_validate(tmpstr
, -1, NULL
)) {
634 strncpy2(outbuf
, tmpstr
, outlen
);
637 } else if (tmpstr
&& !g_utf8_validate(tmpstr
, -1, NULL
)) {
639 codeconv_set_strict(TRUE
);
640 tmpstr
= conv_iconv_strdup(inbuf
,
641 conv_get_locale_charset_str_no_utf8(),
643 codeconv_set_strict(FALSE
);
645 if (tmpstr
&& g_utf8_validate(tmpstr
, -1, NULL
)) {
646 strncpy2(outbuf
, tmpstr
, outlen
);
651 conv_utf8todisp(outbuf
, outlen
, inbuf
);
655 static void conv_noconv(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
657 strncpy2(outbuf
, inbuf
, outlen
);
661 conv_get_fallback_for_private_encoding(const gchar
*encoding
)
663 if (encoding
&& (encoding
[0] == 'X' || encoding
[0] == 'x') &&
664 encoding
[1] == '-') {
665 if (!g_ascii_strcasecmp(encoding
, CS_X_GBK
))
672 CodeConverter
*conv_code_converter_new(const gchar
*src_charset
)
676 src_charset
= conv_get_fallback_for_private_encoding(src_charset
);
678 conv
= g_new0(CodeConverter
, 1);
679 conv
->code_conv_func
= conv_get_code_conv_func(src_charset
, NULL
);
680 conv
->charset_str
= g_strdup(src_charset
);
681 conv
->charset
= conv_get_charset_from_str(src_charset
);
686 void conv_code_converter_destroy(CodeConverter
*conv
)
688 g_free(conv
->charset_str
);
692 gint
conv_convert(CodeConverter
*conv
, gchar
*outbuf
, gint outlen
,
695 if (conv
->code_conv_func
!= conv_noconv
)
696 conv
->code_conv_func(outbuf
, outlen
, inbuf
);
700 str
= conv_iconv_strdup(inbuf
, conv
->charset_str
, NULL
);
704 strncpy2(outbuf
, str
, outlen
);
712 gchar
*conv_codeset_strdup(const gchar
*inbuf
,
713 const gchar
*src_code
, const gchar
*dest_code
)
717 CodeConvFunc conv_func
;
719 src_code
= conv_get_fallback_for_private_encoding(src_code
);
720 conv_func
= conv_get_code_conv_func(src_code
, dest_code
);
721 if (conv_func
!= conv_noconv
) {
722 len
= (strlen(inbuf
) + 1) * 3;
724 if (!buf
) return NULL
;
726 conv_func(buf
, len
, inbuf
);
727 return g_realloc(buf
, strlen(buf
) + 1);
730 return conv_iconv_strdup(inbuf
, src_code
, dest_code
);
733 static CodeConvFunc
conv_get_code_conv_func(const gchar
*src_charset_str
,
734 const gchar
*dest_charset_str
)
736 CodeConvFunc code_conv
= conv_noconv
;
738 CharSet dest_charset
;
740 if (!src_charset_str
)
741 src_charset
= conv_get_locale_charset();
743 src_charset
= conv_get_charset_from_str(src_charset_str
);
745 /* auto detection mode */
746 if (!src_charset_str
&& !dest_charset_str
) {
747 if (conv_is_ja_locale())
748 return conv_anytodisp
;
753 dest_charset
= conv_get_charset_from_str(dest_charset_str
);
755 if (dest_charset
== C_US_ASCII
)
756 return conv_ustodisp
;
758 switch (src_charset
) {
776 case C_ISO_2022_JP_2
:
777 case C_ISO_2022_JP_3
:
778 if (dest_charset
== C_AUTO
)
779 code_conv
= conv_jistodisp
;
780 else if (dest_charset
== C_EUC_JP
)
781 code_conv
= conv_jistoeuc
;
782 else if (dest_charset
== C_UTF_8
)
783 code_conv
= conv_jistoutf8
;
786 if (dest_charset
== C_AUTO
)
787 code_conv
= conv_sjistodisp
;
788 else if (dest_charset
== C_EUC_JP
)
789 code_conv
= conv_sjistoeuc
;
790 else if (dest_charset
== C_UTF_8
)
791 code_conv
= conv_sjistoutf8
;
794 if (dest_charset
== C_AUTO
)
795 code_conv
= conv_euctodisp
;
796 else if (dest_charset
== C_ISO_2022_JP
||
797 dest_charset
== C_ISO_2022_JP_2
||
798 dest_charset
== C_ISO_2022_JP_3
)
799 code_conv
= conv_euctojis
;
800 else if (dest_charset
== C_UTF_8
)
801 code_conv
= conv_euctoutf8
;
804 if (dest_charset
== C_EUC_JP
)
805 code_conv
= conv_utf8toeuc
;
806 else if (dest_charset
== C_ISO_2022_JP
||
807 dest_charset
== C_ISO_2022_JP_2
||
808 dest_charset
== C_ISO_2022_JP_3
)
809 code_conv
= conv_utf8tojis
;
818 gchar
*conv_iconv_strdup(const gchar
*inbuf
,
819 const gchar
*src_code
, const gchar
*dest_code
)
824 if (!src_code
&& !dest_code
&&
825 g_utf8_validate(inbuf
, -1, NULL
))
826 return g_strdup(inbuf
);
829 src_code
= conv_get_outgoing_charset_str();
831 dest_code
= CS_INTERNAL
;
833 /* don't convert if src and dest codeset are identical */
834 if (!strcasecmp(src_code
, dest_code
))
835 return g_strdup(inbuf
);
837 /* don't convert if dest codeset is US-ASCII */
838 if (!strcasecmp(src_code
, CS_US_ASCII
))
839 return g_strdup(inbuf
);
841 /* don't convert if dest codeset is US-ASCII */
842 if (!strcasecmp(dest_code
, CS_US_ASCII
))
843 return g_strdup(inbuf
);
845 cd
= iconv_open(dest_code
, src_code
);
846 if (cd
== (iconv_t
)-1)
849 outbuf
= conv_iconv_strdup_with_cd(inbuf
, cd
);
856 gchar
*conv_iconv_strdup_with_cd(const gchar
*inbuf
, iconv_t cd
)
858 const gchar
*inbuf_p
;
869 in_size
= strlen(inbuf
);
871 out_size
= (in_size
+ 1) * 2;
872 outbuf
= g_malloc(out_size
);
876 #define EXPAND_BUF() \
878 len = outbuf_p - outbuf; \
880 outbuf = g_realloc(outbuf, out_size); \
881 outbuf_p = outbuf + len; \
882 out_left = out_size - len; \
885 while ((n_conv
= iconv(cd
, (ICONV_CONST gchar
**)&inbuf_p
, &in_left
,
886 &outbuf_p
, &out_left
)) == (size_t)-1) {
887 if (EILSEQ
== errno
) {
892 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
898 *outbuf_p
++ = SUBST_CHAR
;
900 } else if (EINVAL
== errno
) {
902 } else if (E2BIG
== errno
) {
905 g_warning("conv_iconv_strdup(): %s\n",
911 while ((n_conv
= iconv(cd
, NULL
, NULL
, &outbuf_p
, &out_left
)) ==
913 if (E2BIG
== errno
) {
916 g_warning("conv_iconv_strdup(): %s\n",
924 len
= outbuf_p
- outbuf
;
925 outbuf
= g_realloc(outbuf
, len
+ 1);
931 static const struct {
935 {C_US_ASCII
, CS_US_ASCII
},
936 {C_US_ASCII
, CS_ANSI_X3_4_1968
},
939 {C_ISO_8859_1
, CS_ISO_8859_1
},
940 {C_ISO_8859_2
, CS_ISO_8859_2
},
941 {C_ISO_8859_3
, CS_ISO_8859_3
},
942 {C_ISO_8859_4
, CS_ISO_8859_4
},
943 {C_ISO_8859_5
, CS_ISO_8859_5
},
944 {C_ISO_8859_6
, CS_ISO_8859_6
},
945 {C_ISO_8859_7
, CS_ISO_8859_7
},
946 {C_ISO_8859_8
, CS_ISO_8859_8
},
947 {C_ISO_8859_9
, CS_ISO_8859_9
},
948 {C_ISO_8859_10
, CS_ISO_8859_10
},
949 {C_ISO_8859_11
, CS_ISO_8859_11
},
950 {C_ISO_8859_13
, CS_ISO_8859_13
},
951 {C_ISO_8859_14
, CS_ISO_8859_14
},
952 {C_ISO_8859_15
, CS_ISO_8859_15
},
953 {C_BALTIC
, CS_BALTIC
},
954 {C_CP1250
, CS_CP1250
},
955 {C_CP1251
, CS_CP1251
},
956 {C_CP1252
, CS_CP1252
},
957 {C_CP1253
, CS_CP1253
},
958 {C_CP1254
, CS_CP1254
},
959 {C_CP1255
, CS_CP1255
},
960 {C_CP1256
, CS_CP1256
},
961 {C_CP1257
, CS_CP1257
},
962 {C_CP1258
, CS_CP1258
},
963 {C_WINDOWS_1250
, CS_WINDOWS_1250
},
964 {C_WINDOWS_1251
, CS_WINDOWS_1251
},
965 {C_WINDOWS_1252
, CS_WINDOWS_1252
},
966 {C_WINDOWS_1253
, CS_WINDOWS_1253
},
967 {C_WINDOWS_1254
, CS_WINDOWS_1254
},
968 {C_WINDOWS_1255
, CS_WINDOWS_1255
},
969 {C_WINDOWS_1256
, CS_WINDOWS_1256
},
970 {C_WINDOWS_1257
, CS_WINDOWS_1257
},
971 {C_WINDOWS_1258
, CS_WINDOWS_1258
},
972 {C_KOI8_R
, CS_KOI8_R
},
973 {C_KOI8_T
, CS_KOI8_T
},
974 {C_KOI8_U
, CS_KOI8_U
},
975 {C_ISO_2022_JP
, CS_ISO_2022_JP
},
976 {C_ISO_2022_JP_2
, CS_ISO_2022_JP_2
},
977 {C_ISO_2022_JP_3
, CS_ISO_2022_JP_3
},
978 {C_EUC_JP
, CS_EUC_JP
},
979 {C_EUC_JP
, CS_EUCJP
},
980 {C_EUC_JP_MS
, CS_EUC_JP_MS
},
981 {C_SHIFT_JIS
, CS_SHIFT_JIS
},
982 {C_SHIFT_JIS
, CS_SHIFT__JIS
},
983 {C_SHIFT_JIS
, CS_SJIS
},
984 {C_ISO_2022_KR
, CS_ISO_2022_KR
},
985 {C_EUC_KR
, CS_EUC_KR
},
986 {C_ISO_2022_CN
, CS_ISO_2022_CN
},
987 {C_EUC_CN
, CS_EUC_CN
},
988 {C_GB2312
, CS_GB2312
},
990 {C_EUC_TW
, CS_EUC_TW
},
992 {C_BIG5_HKSCS
, CS_BIG5_HKSCS
},
993 {C_TIS_620
, CS_TIS_620
},
994 {C_WINDOWS_874
, CS_WINDOWS_874
},
995 {C_GEORGIAN_PS
, CS_GEORGIAN_PS
},
996 {C_TCVN5712_1
, CS_TCVN5712_1
},
999 static const struct {
1000 gchar
*const locale
;
1002 CharSet out_charset
;
1003 } locale_table
[] = {
1004 {"ja_JP.eucJP" , C_EUC_JP
, C_ISO_2022_JP
},
1005 {"ja_JP.EUC-JP" , C_EUC_JP
, C_ISO_2022_JP
},
1006 {"ja_JP.EUC" , C_EUC_JP
, C_ISO_2022_JP
},
1007 {"ja_JP.ujis" , C_EUC_JP
, C_ISO_2022_JP
},
1008 {"ja_JP.SJIS" , C_SHIFT_JIS
, C_ISO_2022_JP
},
1009 {"ja_JP.JIS" , C_ISO_2022_JP
, C_ISO_2022_JP
},
1011 {"ja_JP" , C_SHIFT_JIS
, C_ISO_2022_JP
},
1013 {"ja_JP" , C_EUC_JP
, C_ISO_2022_JP
},
1015 {"ko_KR.EUC-KR" , C_EUC_KR
, C_EUC_KR
},
1016 {"ko_KR" , C_EUC_KR
, C_EUC_KR
},
1017 {"zh_CN.GB2312" , C_GB2312
, C_GB2312
},
1018 {"zh_CN.GBK" , C_GBK
, C_GBK
},
1019 {"zh_CN" , C_GB2312
, C_GB2312
},
1020 {"zh_HK" , C_BIG5_HKSCS
, C_BIG5_HKSCS
},
1021 {"zh_TW.eucTW" , C_EUC_TW
, C_BIG5
},
1022 {"zh_TW.EUC-TW" , C_EUC_TW
, C_BIG5
},
1023 {"zh_TW.Big5" , C_BIG5
, C_BIG5
},
1024 {"zh_TW" , C_BIG5
, C_BIG5
},
1026 {"ru_RU.KOI8-R" , C_KOI8_R
, C_KOI8_R
},
1027 {"ru_RU.KOI8R" , C_KOI8_R
, C_KOI8_R
},
1028 {"ru_RU.CP1251" , C_WINDOWS_1251
, C_KOI8_R
},
1029 {"ru_RU" , C_ISO_8859_5
, C_KOI8_R
},
1030 {"tg_TJ" , C_KOI8_T
, C_KOI8_T
},
1031 {"ru_UA" , C_KOI8_U
, C_KOI8_U
},
1032 {"uk_UA.CP1251" , C_WINDOWS_1251
, C_KOI8_U
},
1033 {"uk_UA" , C_KOI8_U
, C_KOI8_U
},
1035 {"be_BY" , C_WINDOWS_1251
, C_WINDOWS_1251
},
1036 {"bg_BG" , C_WINDOWS_1251
, C_WINDOWS_1251
},
1038 {"yi_US" , C_WINDOWS_1255
, C_WINDOWS_1255
},
1040 {"af_ZA" , C_ISO_8859_1
, C_ISO_8859_1
},
1041 {"br_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1042 {"ca_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1043 {"da_DK" , C_ISO_8859_1
, C_ISO_8859_1
},
1044 {"de_AT" , C_ISO_8859_1
, C_ISO_8859_1
},
1045 {"de_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1046 {"de_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1047 {"de_DE" , C_ISO_8859_1
, C_ISO_8859_1
},
1048 {"de_LU" , C_ISO_8859_1
, C_ISO_8859_1
},
1049 {"en_AU" , C_ISO_8859_1
, C_ISO_8859_1
},
1050 {"en_BW" , C_ISO_8859_1
, C_ISO_8859_1
},
1051 {"en_CA" , C_ISO_8859_1
, C_ISO_8859_1
},
1052 {"en_DK" , C_ISO_8859_1
, C_ISO_8859_1
},
1053 {"en_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1054 {"en_HK" , C_ISO_8859_1
, C_ISO_8859_1
},
1055 {"en_IE" , C_ISO_8859_1
, C_ISO_8859_1
},
1056 {"en_NZ" , C_ISO_8859_1
, C_ISO_8859_1
},
1057 {"en_PH" , C_ISO_8859_1
, C_ISO_8859_1
},
1058 {"en_SG" , C_ISO_8859_1
, C_ISO_8859_1
},
1059 {"en_US" , C_ISO_8859_1
, C_ISO_8859_1
},
1060 {"en_ZA" , C_ISO_8859_1
, C_ISO_8859_1
},
1061 {"en_ZW" , C_ISO_8859_1
, C_ISO_8859_1
},
1062 {"es_AR" , C_ISO_8859_1
, C_ISO_8859_1
},
1063 {"es_BO" , C_ISO_8859_1
, C_ISO_8859_1
},
1064 {"es_CL" , C_ISO_8859_1
, C_ISO_8859_1
},
1065 {"es_CO" , C_ISO_8859_1
, C_ISO_8859_1
},
1066 {"es_CR" , C_ISO_8859_1
, C_ISO_8859_1
},
1067 {"es_DO" , C_ISO_8859_1
, C_ISO_8859_1
},
1068 {"es_EC" , C_ISO_8859_1
, C_ISO_8859_1
},
1069 {"es_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1070 {"es_GT" , C_ISO_8859_1
, C_ISO_8859_1
},
1071 {"es_HN" , C_ISO_8859_1
, C_ISO_8859_1
},
1072 {"es_MX" , C_ISO_8859_1
, C_ISO_8859_1
},
1073 {"es_NI" , C_ISO_8859_1
, C_ISO_8859_1
},
1074 {"es_PA" , C_ISO_8859_1
, C_ISO_8859_1
},
1075 {"es_PE" , C_ISO_8859_1
, C_ISO_8859_1
},
1076 {"es_PR" , C_ISO_8859_1
, C_ISO_8859_1
},
1077 {"es_PY" , C_ISO_8859_1
, C_ISO_8859_1
},
1078 {"es_SV" , C_ISO_8859_1
, C_ISO_8859_1
},
1079 {"es_US" , C_ISO_8859_1
, C_ISO_8859_1
},
1080 {"es_UY" , C_ISO_8859_1
, C_ISO_8859_1
},
1081 {"es_VE" , C_ISO_8859_1
, C_ISO_8859_1
},
1082 {"et_EE" , C_ISO_8859_1
, C_ISO_8859_1
},
1083 {"eu_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1084 {"fi_FI" , C_ISO_8859_1
, C_ISO_8859_1
},
1085 {"fo_FO" , C_ISO_8859_1
, C_ISO_8859_1
},
1086 {"fr_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1087 {"fr_CA" , C_ISO_8859_1
, C_ISO_8859_1
},
1088 {"fr_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1089 {"fr_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1090 {"fr_LU" , C_ISO_8859_1
, C_ISO_8859_1
},
1091 {"ga_IE" , C_ISO_8859_1
, C_ISO_8859_1
},
1092 {"gl_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1093 {"gv_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1094 {"id_ID" , C_ISO_8859_1
, C_ISO_8859_1
},
1095 {"is_IS" , C_ISO_8859_1
, C_ISO_8859_1
},
1096 {"it_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1097 {"it_IT" , C_ISO_8859_1
, C_ISO_8859_1
},
1098 {"kl_GL" , C_ISO_8859_1
, C_ISO_8859_1
},
1099 {"kw_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1100 {"ms_MY" , C_ISO_8859_1
, C_ISO_8859_1
},
1101 {"nl_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1102 {"nl_NL" , C_ISO_8859_1
, C_ISO_8859_1
},
1103 {"nn_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1104 {"no_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1105 {"oc_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1106 {"pt_BR" , C_ISO_8859_1
, C_ISO_8859_1
},
1107 {"pt_PT" , C_ISO_8859_1
, C_ISO_8859_1
},
1108 {"sq_AL" , C_ISO_8859_1
, C_ISO_8859_1
},
1109 {"sv_FI" , C_ISO_8859_1
, C_ISO_8859_1
},
1110 {"sv_SE" , C_ISO_8859_1
, C_ISO_8859_1
},
1111 {"tl_PH" , C_ISO_8859_1
, C_ISO_8859_1
},
1112 {"uz_UZ" , C_ISO_8859_1
, C_ISO_8859_1
},
1113 {"wa_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1115 {"bs_BA" , C_ISO_8859_2
, C_ISO_8859_2
},
1116 {"cs_CZ" , C_ISO_8859_2
, C_ISO_8859_2
},
1117 {"hr_HR" , C_ISO_8859_2
, C_ISO_8859_2
},
1118 {"hu_HU" , C_ISO_8859_2
, C_ISO_8859_2
},
1119 {"pl_PL" , C_ISO_8859_2
, C_ISO_8859_2
},
1120 {"ro_RO" , C_ISO_8859_2
, C_ISO_8859_2
},
1121 {"sk_SK" , C_ISO_8859_2
, C_ISO_8859_2
},
1122 {"sl_SI" , C_ISO_8859_2
, C_ISO_8859_2
},
1124 {"sr_YU@cyrillic" , C_ISO_8859_5
, C_ISO_8859_5
},
1125 {"sr_YU" , C_ISO_8859_2
, C_ISO_8859_2
},
1127 {"mt_MT" , C_ISO_8859_3
, C_ISO_8859_3
},
1129 {"lt_LT.iso88594" , C_ISO_8859_4
, C_ISO_8859_4
},
1130 {"lt_LT.ISO8859-4" , C_ISO_8859_4
, C_ISO_8859_4
},
1131 {"lt_LT.ISO_8859-4" , C_ISO_8859_4
, C_ISO_8859_4
},
1132 {"lt_LT" , C_ISO_8859_13
, C_ISO_8859_13
},
1134 {"mk_MK" , C_ISO_8859_5
, C_ISO_8859_5
},
1136 {"ar_AE" , C_ISO_8859_6
, C_ISO_8859_6
},
1137 {"ar_BH" , C_ISO_8859_6
, C_ISO_8859_6
},
1138 {"ar_DZ" , C_ISO_8859_6
, C_ISO_8859_6
},
1139 {"ar_EG" , C_ISO_8859_6
, C_ISO_8859_6
},
1140 {"ar_IQ" , C_ISO_8859_6
, C_ISO_8859_6
},
1141 {"ar_JO" , C_ISO_8859_6
, C_ISO_8859_6
},
1142 {"ar_KW" , C_ISO_8859_6
, C_ISO_8859_6
},
1143 {"ar_LB" , C_ISO_8859_6
, C_ISO_8859_6
},
1144 {"ar_LY" , C_ISO_8859_6
, C_ISO_8859_6
},
1145 {"ar_MA" , C_ISO_8859_6
, C_ISO_8859_6
},
1146 {"ar_OM" , C_ISO_8859_6
, C_ISO_8859_6
},
1147 {"ar_QA" , C_ISO_8859_6
, C_ISO_8859_6
},
1148 {"ar_SA" , C_ISO_8859_6
, C_ISO_8859_6
},
1149 {"ar_SD" , C_ISO_8859_6
, C_ISO_8859_6
},
1150 {"ar_SY" , C_ISO_8859_6
, C_ISO_8859_6
},
1151 {"ar_TN" , C_ISO_8859_6
, C_ISO_8859_6
},
1152 {"ar_YE" , C_ISO_8859_6
, C_ISO_8859_6
},
1154 {"el_GR" , C_ISO_8859_7
, C_ISO_8859_7
},
1155 {"he_IL" , C_ISO_8859_8
, C_ISO_8859_8
},
1156 {"iw_IL" , C_ISO_8859_8
, C_ISO_8859_8
},
1157 {"tr_TR" , C_ISO_8859_9
, C_ISO_8859_9
},
1159 {"lv_LV" , C_ISO_8859_13
, C_ISO_8859_13
},
1160 {"mi_NZ" , C_ISO_8859_13
, C_ISO_8859_13
},
1162 {"cy_GB" , C_ISO_8859_14
, C_ISO_8859_14
},
1164 {"ar_IN" , C_UTF_8
, C_UTF_8
},
1165 {"en_IN" , C_UTF_8
, C_UTF_8
},
1166 {"se_NO" , C_UTF_8
, C_UTF_8
},
1167 {"ta_IN" , C_UTF_8
, C_UTF_8
},
1168 {"te_IN" , C_UTF_8
, C_UTF_8
},
1169 {"ur_PK" , C_UTF_8
, C_UTF_8
},
1171 {"th_TH" , C_TIS_620
, C_TIS_620
},
1172 /* {"th_TH" , C_WINDOWS_874}, */
1173 /* {"th_TH" , C_ISO_8859_11}, */
1175 {"ka_GE" , C_GEORGIAN_PS
, C_GEORGIAN_PS
},
1176 {"vi_VN.TCVN" , C_TCVN5712_1
, C_TCVN5712_1
},
1178 {"C" , C_US_ASCII
, C_US_ASCII
},
1179 {"POSIX" , C_US_ASCII
, C_US_ASCII
},
1180 {"ANSI_X3.4-1968" , C_US_ASCII
, C_US_ASCII
},
1183 static GHashTable
*conv_get_charset_to_str_table(void)
1185 static GHashTable
*table
;
1191 table
= g_hash_table_new(NULL
, g_direct_equal
);
1193 for (i
= 0; i
< sizeof(charsets
) / sizeof(charsets
[0]); i
++) {
1194 if (g_hash_table_lookup(table
, GUINT_TO_POINTER(charsets
[i
].charset
))
1197 (table
, GUINT_TO_POINTER(charsets
[i
].charset
),
1205 static GHashTable
*conv_get_charset_from_str_table(void)
1207 static GHashTable
*table
;
1213 table
= g_hash_table_new(str_case_hash
, str_case_equal
);
1215 for (i
= 0; i
< sizeof(charsets
) / sizeof(charsets
[0]); i
++) {
1216 g_hash_table_insert(table
, charsets
[i
].name
,
1217 GUINT_TO_POINTER(charsets
[i
].charset
));
1223 const gchar
*conv_get_charset_str(CharSet charset
)
1227 table
= conv_get_charset_to_str_table();
1228 return g_hash_table_lookup(table
, GUINT_TO_POINTER(charset
));
1231 CharSet
conv_get_charset_from_str(const gchar
*charset
)
1235 if (!charset
) return C_AUTO
;
1237 table
= conv_get_charset_from_str_table();
1238 return GPOINTER_TO_UINT(g_hash_table_lookup(table
, charset
));
1241 static CharSet
conv_get_locale_charset(void)
1243 static CharSet cur_charset
= -1;
1244 const gchar
*cur_locale
;
1248 if (cur_charset
!= -1)
1251 cur_locale
= conv_get_current_locale();
1253 cur_charset
= C_US_ASCII
;
1257 if (strcasestr(cur_locale
, ".UTF-8") ||
1258 strcasestr(cur_locale
, ".utf8")) {
1259 cur_charset
= C_UTF_8
;
1263 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1264 cur_charset
= C_ISO_8859_15
;
1268 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1271 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1272 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1273 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1274 strlen(locale_table
[i
].locale
))) {
1275 cur_charset
= locale_table
[i
].charset
;
1277 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1278 !strchr(p
+ 1, '.')) {
1279 if (strlen(cur_locale
) == 2 &&
1280 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1281 cur_charset
= locale_table
[i
].charset
;
1287 cur_charset
= C_AUTO
;
1291 static CharSet
conv_get_locale_charset_no_utf8(void)
1293 static CharSet cur_charset
= -1;
1294 const gchar
*cur_locale
;
1299 if (prefs_common
.broken_are_utf8
)
1300 return conv_get_locale_charset();
1302 if (cur_charset
!= -1)
1305 cur_locale
= conv_get_current_locale();
1307 cur_charset
= C_US_ASCII
;
1311 if (strcasestr(cur_locale
, "UTF-8")) {
1312 tmp
= g_strdup(cur_locale
);
1313 *(strcasestr(tmp
, ".UTF-8")) = '\0';
1317 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1318 cur_charset
= C_ISO_8859_15
;
1322 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1325 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1326 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1327 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1328 strlen(locale_table
[i
].locale
))) {
1329 cur_charset
= locale_table
[i
].charset
;
1331 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1332 !strchr(p
+ 1, '.')) {
1333 if (strlen(cur_locale
) == 2 &&
1334 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1335 cur_charset
= locale_table
[i
].charset
;
1341 cur_charset
= C_AUTO
;
1345 const gchar
*conv_get_locale_charset_str(void)
1347 static const gchar
*codeset
= NULL
;
1350 codeset
= conv_get_charset_str(conv_get_locale_charset());
1352 return codeset
? codeset
: CS_INTERNAL
;
1355 const gchar
*conv_get_locale_charset_str_no_utf8(void)
1357 static const gchar
*codeset
= NULL
;
1360 codeset
= conv_get_charset_str(conv_get_locale_charset_no_utf8());
1362 return codeset
? codeset
: CS_INTERNAL
;
1365 static CharSet
conv_get_outgoing_charset(void)
1367 static CharSet out_charset
= -1;
1368 const gchar
*cur_locale
;
1372 if (out_charset
!= -1)
1375 cur_locale
= conv_get_current_locale();
1377 out_charset
= C_AUTO
;
1381 if (strcasestr(cur_locale
, "UTF-8")) {
1382 out_charset
= C_UTF_8
;
1386 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1387 out_charset
= C_ISO_8859_15
;
1391 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1394 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1395 strlen(locale_table
[i
].locale
))) {
1396 out_charset
= locale_table
[i
].out_charset
;
1398 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1399 !strchr(p
+ 1, '.')) {
1400 if (strlen(cur_locale
) == 2 &&
1401 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1402 out_charset
= locale_table
[i
].out_charset
;
1411 const gchar
*conv_get_outgoing_charset_str(void)
1413 CharSet out_charset
;
1416 out_charset
= conv_get_outgoing_charset();
1417 str
= conv_get_charset_str(out_charset
);
1419 return str
? str
: CS_UTF_8
;
1422 const gchar
*conv_get_current_locale(void)
1424 const gchar
*cur_locale
;
1427 cur_locale
= g_win32_getlocale();
1429 cur_locale
= g_getenv("LC_ALL");
1430 if (!cur_locale
) cur_locale
= g_getenv("LC_CTYPE");
1431 if (!cur_locale
) cur_locale
= g_getenv("LANG");
1432 if (!cur_locale
) cur_locale
= setlocale(LC_CTYPE
, NULL
);
1433 #endif /* G_OS_WIN32 */
1435 debug_print("current locale: %s\n",
1436 cur_locale
? cur_locale
: "(none)");
1441 static gboolean
conv_is_ja_locale(void)
1443 static gint is_ja_locale
= -1;
1444 const gchar
*cur_locale
;
1446 if (is_ja_locale
!= -1)
1447 return is_ja_locale
!= 0;
1450 cur_locale
= conv_get_current_locale();
1452 if (g_ascii_strncasecmp(cur_locale
, "ja", 2) == 0)
1456 return is_ja_locale
!= 0;
1459 gchar
*conv_unmime_header(const gchar
*str
, const gchar
*default_encoding
)
1461 gchar buf
[BUFFSIZE
];
1463 if (is_ascii_str(str
))
1464 return unmime_header(str
);
1466 if (default_encoding
) {
1469 utf8_buf
= conv_codeset_strdup
1470 (str
, default_encoding
, CS_INTERNAL
);
1474 decoded_str
= unmime_header(utf8_buf
);
1480 if (conv_is_ja_locale())
1481 conv_anytodisp(buf
, sizeof(buf
), str
);
1483 conv_localetodisp(buf
, sizeof(buf
), str
);
1485 return unmime_header(buf
);
1488 #define MAX_LINELEN 76
1489 #define MAX_HARD_LINELEN 996
1490 #define MIMESEP_BEGIN "=?"
1491 #define MIMESEP_END "?="
1493 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1495 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1500 if ((cond) && *srcp) { \
1501 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1502 if (isspace(*(destp - 1))) \
1504 else if (is_plain_text && isspace(*srcp)) \
1509 left = MAX_LINELEN - 1; \
1515 void conv_encode_header_full(gchar
*dest
, gint len
, const gchar
*src
,
1516 gint header_len
, gboolean addr_field
,
1517 const gchar
*out_encoding_
)
1519 const gchar
*cur_encoding
;
1520 const gchar
*out_encoding
;
1524 const guchar
*srcp
= src
;
1525 guchar
*destp
= dest
;
1526 gboolean use_base64
;
1528 g_return_if_fail(g_utf8_validate(src
, -1, NULL
) == TRUE
);
1530 if (MB_CUR_MAX
> 1) {
1532 mimesep_enc
= "?B?";
1535 mimesep_enc
= "?Q?";
1538 cur_encoding
= CS_INTERNAL
;
1541 out_encoding
= out_encoding_
;
1543 out_encoding
= conv_get_outgoing_charset_str();
1545 if (!strcmp(out_encoding
, CS_US_ASCII
))
1546 out_encoding
= CS_ISO_8859_1
;
1548 mimestr_len
= strlen(MIMESEP_BEGIN
) + strlen(out_encoding
) +
1549 strlen(mimesep_enc
) + strlen(MIMESEP_END
);
1551 left
= MAX_LINELEN
- header_len
;
1554 LBREAK_IF_REQUIRED(left
<= 0, TRUE
);
1556 while (isspace(*srcp
)) {
1559 LBREAK_IF_REQUIRED(left
<= 0, TRUE
);
1562 /* output as it is if the next word is ASCII string */
1563 if (!is_next_nonascii(srcp
)) {
1566 word_len
= get_next_word_len(srcp
);
1567 LBREAK_IF_REQUIRED(left
< word_len
, TRUE
);
1568 while (word_len
> 0) {
1569 LBREAK_IF_REQUIRED(left
+ (MAX_HARD_LINELEN
- MAX_LINELEN
) <= 0, TRUE
)
1578 /* don't include parentheses and quotes in encoded strings */
1579 if (addr_field
&& (*srcp
== '(' || *srcp
== ')' || *srcp
== '"')) {
1580 LBREAK_IF_REQUIRED(left
< 2, FALSE
);
1591 const guchar
*p
= srcp
;
1593 gint out_enc_str_len
;
1594 gint mime_block_len
;
1595 gboolean cont
= FALSE
;
1597 while (*p
!= '\0') {
1598 if (isspace(*p
) && !is_next_nonascii(p
+ 1))
1600 /* don't include parentheses in encoded
1602 if (addr_field
&& (*p
== '(' || *p
== ')' || *p
== '"'))
1605 mb_len
= g_utf8_skip
[*p
];
1607 Xstrndup_a(part_str
, srcp
, cur_len
+ mb_len
, );
1608 out_str
= conv_codeset_strdup
1609 (part_str
, cur_encoding
, out_encoding
);
1615 g_warning("conv_encode_header(): code conversion failed\n");
1616 conv_unreadable_8bit(part_str
);
1617 out_str
= g_strdup(part_str
);
1620 out_str_len
= strlen(out_str
);
1623 out_enc_str_len
= B64LEN(out_str_len
);
1626 qp_get_q_encoding_len(out_str
);
1630 if (mimestr_len
+ out_enc_str_len
<= left
) {
1633 } else if (cur_len
== 0) {
1634 LBREAK_IF_REQUIRED(1, FALSE
);
1643 Xstrndup_a(part_str
, srcp
, cur_len
, );
1644 out_str
= conv_codeset_strdup
1645 (part_str
, cur_encoding
, out_encoding
);
1647 g_warning("conv_encode_header(): code conversion failed\n");
1648 conv_unreadable_8bit(part_str
);
1649 out_str
= g_strdup(part_str
);
1651 out_str_len
= strlen(out_str
);
1654 out_enc_str_len
= B64LEN(out_str_len
);
1657 qp_get_q_encoding_len(out_str
);
1659 Xalloca(enc_str
, out_enc_str_len
+ 1, );
1661 base64_encode(enc_str
, out_str
, out_str_len
);
1663 qp_q_encode(enc_str
, out_str
);
1667 /* output MIME-encoded string block */
1668 mime_block_len
= mimestr_len
+ strlen(enc_str
);
1669 g_snprintf(destp
, mime_block_len
+ 1,
1670 MIMESEP_BEGIN
"%s%s%s" MIMESEP_END
,
1671 out_encoding
, mimesep_enc
, enc_str
);
1672 destp
+= mime_block_len
;
1675 left
-= mime_block_len
;
1678 LBREAK_IF_REQUIRED(cont
, FALSE
);
1688 void conv_encode_header(gchar
*dest
, gint len
, const gchar
*src
,
1689 gint header_len
, gboolean addr_field
)
1691 conv_encode_header_full(dest
,len
,src
,header_len
,addr_field
,NULL
);
1694 #undef LBREAK_IF_REQUIRED
1695 gchar
*conv_filename_from_utf8(const gchar
*utf8_file
)
1698 GError
*error
= NULL
;
1700 fs_file
= g_filename_from_utf8(utf8_file
, -1, NULL
, NULL
, &error
);
1702 g_warning("failed to convert encoding of file name: %s\n",
1704 g_error_free(error
);
1707 fs_file
= g_strdup(utf8_file
);
1712 gchar
*conv_filename_to_utf8(const gchar
*fs_file
)
1714 gchar
*utf8_file
= NULL
;
1715 GError
*error
= NULL
;
1717 utf8_file
= g_filename_to_utf8(fs_file
, -1, NULL
, NULL
, &error
);
1719 g_warning("failed to convert encoding of file name: %s\n",
1721 g_error_free(error
);
1724 if (!utf8_file
|| !g_utf8_validate(utf8_file
, -1, NULL
)) {
1726 utf8_file
= g_strdup(fs_file
);
1727 conv_unreadable_8bit(utf8_file
);