2 * ========================================================================
3 * Copyright 2013-2022 Eduardo Chappa
4 * Copyright 2006-2008 University of Washington
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * ========================================================================
15 #include "../pith/headers.h"
16 #include "../pith/charset.h"
17 #include "../pith/state.h"
18 #include "../pith/conf.h"
19 #include "../pith/escapes.h"
20 #include "../pith/mimedesc.h"
21 #include "../pith/filter.h"
22 #include "../pith/string.h"
23 #include "../pith/options.h"
29 int rfc1522_token(char *, int (*)(int), char *, char **);
30 int rfc1522_valtok(int);
31 int rfc1522_valenc(int);
32 int rfc1522_valid(char *, char **, char **, char **, char **);
33 void rfc1522_copy_and_transliterate(unsigned char *, unsigned char **, size_t,
34 unsigned char *, unsigned long, char *);
35 unsigned char *rfc1522_encoded_word(unsigned char *, int, char *);
36 char *rfc1522_8bit(void *, int);
37 char *rfc1522_binary(void *, int);
41 body_charset(MAILSTREAM
*stream
, long int msgno
, unsigned char *section
)
47 if((body
= mail_body(stream
, msgno
, section
)) && body
->type
== TYPETEXT
){
48 if(!(charset
= parameter_val(body
->parameter
, "charset")))
49 charset
= cpystr("US-ASCII");
59 * Copies the source string into allocated space with the 8-bit EUC codes
60 * (on Unix) or the Shift-JIS (on PC) converted into ISO-2022-JP.
61 * Caller is responsible for freeing the result.
64 trans_euc_to_2022_jp(unsigned char *src
)
67 unsigned char *rv
, *p
, *q
;
68 int inside_esc_seq
= 0;
69 int c1
= -1; /* remembers first of pair for Shift-JIS */
74 len
= strlen((char *) src
);
77 * Worst possible increase is every other character an 8-bit character.
78 * In that case, each of those gets 6 extra characters for the escape
79 * sequences. We're not too concerned about the extra length because
80 * these are relatively short strings.
82 alloc
= len
+ 1 + ((len
+1)/2) * 6;
83 rv
= (unsigned char *) fs_get(alloc
* sizeof(char));
85 for(p
= src
, q
= rv
; *p
; p
++){
87 if(c1
>= 0){ /* second of a pair? */
88 int adjust
= *p
< 159;
89 int rowOffset
= c1
< 160 ? 112 : 176;
90 int cellOffset
= adjust
? (*p
> 127 ? 32 : 31) : 126;
92 *q
++ = ((c1
- rowOffset
) << 1) - adjust
;
93 *q
++ = *p
- cellOffset
;
135 * * * * * * * * * RFC 1522 support routines * * * * * * * *
137 * RFC 1522 support is *very* loosely based on code contributed
138 * by Lars-Erik Johansson <lej@cdg.chalmers.se>. Thanks to Lars-Erik,
139 * and apologies for taking such liberties with his code.
142 #define RFC1522_INIT "=?"
143 #define RFC1522_INIT_L 2
144 #define RFC1522_TERM "?="
145 #define RFC1522_TERM_L 2
146 #define RFC1522_DLIM "?"
147 #define RFC1522_DLIM_L 1
148 #define RFC1522_MAXW 75 /* RFC's say 75, but no senders seem to care*/
149 #define ESPECIALS "()<>@,;:\"/[]?.="
150 #define RFC1522_OVERHEAD(S) (RFC1522_INIT_L + RFC1522_TERM_L + \
151 (2 * RFC1522_DLIM_L) + strlen(S) + 1);
152 #define RFC1522_ENC_CHAR(C) (((C) & 0x80) || !rfc1522_valtok(C) \
156 * rfc1522_decode_to_utf8 - try to decode the given source string ala RFC 2047
157 * (obsoleted RFC 1522) into the given destination buffer,
160 * How large should d be? The decoded string of octets will fit in
161 * the same size string as the source string. However, because we're
162 * translating that into UTF-8 the result may expand. Currently the
163 * Thai character set has single octet characters which expand to
164 * three octets in UTF-8. So it would be safe to use 3 * strlen(s)
165 * for the size of d. One can imagine a currently non-existent
166 * character set that expanded to 4 octets instead, so use 4 to be
169 * Returns: pointer to either the destination buffer containing the
170 * decoded text, or a pointer to the source buffer if there was
171 * no valid 'encoded-word' found during scanning.
174 rfc1522_decode_to_utf8(unsigned char *d
, size_t len
, char *s
)
176 unsigned char *rv
= NULL
, *p
;
177 char *start
= s
, *sw
, *enc
, *txt
, *ew
, **q
, *lang
;
182 *d
= '\0'; /* init destination */
184 while(s
&& (sw
= strstr(s
, RFC1522_INIT
))){
185 if(!rv
) /* there's something to do, init it */
187 /* validate the rest of the encoded-word */
188 if(rfc1522_valid(sw
, &cset
, &enc
, &txt
, &ew
)){
190 * We may have been putting off copying the first part of the
191 * source while waiting to see if we have to copy at all.
193 if(rv
== d
&& s
!= start
){
194 rfc1522_copy_and_transliterate(rv
, &d
, len
, (unsigned char *) start
,
199 /* copy everything between s and sw to destination */
200 for(i
= 0; &s
[i
] < sw
; i
++)
201 if(!isspace((unsigned char)s
[i
])){ /* if some non-whitespace */
202 while(s
< sw
&& d
-rv
<len
-1)
203 *d
++ = (unsigned char) *s
++;
208 enc
[-1] = txt
[-1] = ew
[0] = '\0'; /* tie off token strings */
210 if((lang
= strchr(cset
, '*')) != NULL
)
213 /* based on encoding, write the encoded text to output buffer */
215 case 'Q' : /* 'Q' encoding */
217 /* special hocus-pocus to deal with '_' exception, too bad */
218 for(l
= 0L, i
= 0; txt
[l
]; l
++)
223 q
= (char **) fs_get((i
+ 1) * sizeof(char *));
224 for(l
= 0L, i
= 0; txt
[l
]; l
++)
235 if((p
= rfc822_qprint((unsigned char *)txt
, strlen(txt
), &l
)) != NULL
){
236 rfc1522_copy_and_transliterate(rv
, &d
, len
, p
, l
, cset
);
237 fs_give((void **)&p
); /* free encoded buf */
241 fs_give((void **) &q
);
246 if(q
){ /* restore underscores */
247 for(i
= 0; q
[i
]; i
++)
250 fs_give((void **)&q
);
255 case 'B' : /* 'B' encoding */
257 if((p
= rfc822_base64((unsigned char *) txt
, strlen(txt
), &l
)) != NULL
){
258 rfc1522_copy_and_transliterate(rv
, &d
, len
, p
, l
, cset
);
259 fs_give((void **)&p
); /* free encoded buf */
267 rfc1522_copy_and_transliterate(rv
, &d
, len
, (unsigned char *) txt
,
269 dprint((1, "RFC1522_decode: Unknown ENCODING: %s\n",
274 /* restore trompled source string */
275 enc
[-1] = txt
[-1] = '?';
276 ew
[0] = RFC1522_TERM
[0];
278 /* advance s to start of text after encoded-word */
279 s
= ew
+ RFC1522_TERM_L
;
286 * Found intro, but bogus data followed, treat it as normal text.
288 l
= (sw
- s
) + RFC1522_INIT_L
;
289 rfc1522_copy_and_transliterate(rv
, &d
, len
, (unsigned char *) s
, l
, NULL
);
290 for(; isspace((unsigned char) *(s
+l
)) && d
-rv
<len
-1;l
++)
291 *d
++ = *(s
+l
); /* copy any trailing space */
299 if(s
&& *s
){ /* copy remaining text */
300 rfc1522_copy_and_transliterate(rv
, &d
, len
, (unsigned char *) s
, strlen(s
), NULL
);
306 rfc1522_copy_and_transliterate(rv
, &d
, len
, (unsigned char *) s
, strlen(s
), NULL
);
310 return(rv
? rv
: (unsigned char *) start
);
313 dprint((1, "RFC1522_decode: BOGUS INPUT: -->%s<--\n",
314 start
? start
: "?"));
315 return((unsigned char *) start
);
320 * rfc1522_token - scan the given source line up to the end_str making
321 * sure all subsequent chars are "valid" leaving endp
322 * a the start of the end_str.
323 * Returns: TRUE if we got a valid token, FALSE otherwise
326 rfc1522_token(char *s
, int (*valid
) (int), char *end_str
, char **endp
)
329 if((char) *s
== *end_str
/* test for matching end_str */
331 ? !strncmp((char *)s
+ 1, end_str
+ 1, strlen(end_str
+ 1))
337 if(!(*valid
)(*s
++)) /* test for valid char */
346 * rfc1522_valtok - test for valid character in the RFC 1522 encoded
347 * word's charset and encoding fields.
350 rfc1522_valtok(int c
)
352 return(!(c
== SPACE
|| iscntrl(c
& 0x7f) || strindex(ESPECIALS
, c
)));
357 * rfc1522_valenc - test for valid character in the RFC 1522 encoded
358 * word's encoded-text field.
361 rfc1522_valenc(int c
)
363 return(!(c
== '?' || c
== SPACE
) && isprint((unsigned char)c
));
368 * rfc1522_valid - validate the given string as to it's rfc1522-ness
371 rfc1522_valid(char *s
, char **charset
, char **enc
, char **txt
, char **endp
)
376 rv
= rfc1522_token(c
= s
+RFC1522_INIT_L
, rfc1522_valtok
, RFC1522_DLIM
, &e
)
377 && rfc1522_token(++e
, rfc1522_valtok
, RFC1522_DLIM
, &t
)
378 && rfc1522_token(++t
, rfc1522_valenc
, RFC1522_TERM
, &p
);
397 * rfc1522_copy_and_transliterate - copy given buf to destination buffer
398 * as UTF-8 characters
401 rfc1522_copy_and_transliterate(unsigned char *rv
,
413 memset(&xsrc
, 0, sizeof(SIZEDTEXT
));
415 /* transliterate decoded segment to utf-8 */
417 if(strucmp((char *) cset
, "us-ascii")
418 && strucmp((char *) cset
, "utf-8")){
419 if(utf8_charset(cset
)){
420 if(!utf8_text(&src
, cset
, &xsrc
, 0L)){
421 /* should not happen */
422 alpine_panic("c-client failed to transliterate recognized characterset");
426 /* non-xlatable charset */
427 for(i
= 0; i
< l
; i
++)
428 if(src
.data
[i
] & 0x80){
429 xsrc
.data
= (unsigned char *) fs_get((l
+1) * sizeof(unsigned char));
431 for(i
= 0; i
< l
; i
++)
432 xsrc
.data
[i
] = (src
.data
[i
] & 0x80) ? '?' : src
.data
[i
];
443 src
.size
= strlen((char *) s
);
445 if((cs
= utf8_infercharset(&src
))){
446 if(!(cs
->type
== CT_ASCII
|| cs
->type
== CT_UTF8
)){
447 if(!utf8_text_cs(&src
, cs
, &xsrc
, 0L, 0L)){
448 /* should not happen */
449 alpine_panic("c-client failed to transliterate recognized characterset");
453 else if((cset
=ps_global
->VAR_UNK_CHAR_SET
)
454 && strucmp((char *) cset
, "us-ascii")
455 && strucmp((char *) cset
, "utf-8")
456 && utf8_charset(cset
)){
457 if(!utf8_text(&src
, cset
, &xsrc
, 0L)){
458 /* should not happen */
459 alpine_panic("c-client failed to transliterate recognized character set");
463 /* unknown bytes - mask off high bit chars */
464 for(i
= 0; i
< l
; i
++)
465 if(src
.data
[i
] & 0x80){
466 xsrc
.data
= (unsigned char *) fs_get((l
+1) * sizeof(unsigned char));
468 for(i
= 0; i
< l
; i
++)
469 xsrc
.data
[i
] = (src
.data
[i
] & 0x80) ? '?' : src
.data
[i
];
481 i
= MIN(l
,len
-1-((*d
)-rv
));
482 strncpy((char *) (*d
), (char *) s
, i
);
484 *d
+= l
; /* advance dest ptr to EOL */
488 if(xsrc
.data
&& src
.data
!= xsrc
.data
)
489 fs_give((void **) &xsrc
.data
);
495 * rfc1522_encode - encode the given source string ala RFC 1522,
496 * IF NECESSARY, into the given destination buffer.
497 * Don't bother copying if it turns out encoding
500 * Returns: pointer to either the destination buffer containing the
501 * encoded text, or a pointer to the source buffer if we didn't
502 * have to encode anything.
505 rfc1522_encode(char *d
, size_t dlen
, unsigned char *s
, char *charset
)
507 unsigned char *p
, *q
;
514 charset
= UNKNOWN_CHARSET
;
516 /* look for a reason to encode */
517 for(p
= s
, n
= 0; *p
; p
++)
521 else if(*p
== RFC1522_INIT
[0]
522 && !strncmp((char *) p
, RFC1522_INIT
, RFC1522_INIT_L
)){
523 if(rfc1522_valid((char *) p
, NULL
, NULL
, NULL
, (char **) &q
))
524 p
= q
+ RFC1522_TERM_L
- 1; /* advance past encoded gunk */
526 else if(*p
== ESCAPE
&& match_escapes((char *)(p
+1))){
530 if(n
){ /* found, encoding to do */
532 enc
= (n
> (2 * (p
- s
)) / 3) ? 'B' : 'Q';
535 if(d
-rv
< dlen
-1-(RFC1522_INIT_L
+2*RFC1522_DLIM_L
+1)){
536 sstrncpy(&d
, RFC1522_INIT
, dlen
-(d
-rv
)); /* insert intro header, */
537 sstrncpy(&d
, charset
, dlen
-(d
-rv
)); /* character set tag, */
538 sstrncpy(&d
, RFC1522_DLIM
, dlen
-(d
-rv
)); /* and encoding flavor */
542 sstrncpy(&d
, RFC1522_DLIM
, dlen
-(d
-rv
));
546 * feed lines to encoder such that they're guaranteed
547 * less than RFC1522_MAXW.
549 p
= rfc1522_encoded_word(s
, enc
, charset
);
550 if(enc
== 'B') /* insert encoded data */
551 sstrncpy(&d
, t
= rfc1522_binary(s
, p
- s
), dlen
-1-(d
-rv
));
552 else /* 'Q' encoding */
553 sstrncpy(&d
, t
= rfc1522_8bit(s
, p
- s
), dlen
-1-(d
-rv
));
555 sstrncpy(&d
, RFC1522_TERM
, dlen
-1-(d
-rv
)); /* insert terminator */
556 fs_give((void **) &t
);
557 if(*p
) /* more src string follows */
558 sstrncpy(&d
, "\015\012 ", dlen
-1-(d
-rv
)); /* insert cont. line */
560 s
= p
; /* advance s */
567 return((char *) s
); /* no work for us here */
573 * rfc1522_encoded_word -- cut given string into max length encoded word
575 * Return: pointer into 's' such that the encoded 's' is no greater
578 * NOTE: this line break code is NOT cognizant of any SI/SO
579 * charset requirements nor similar strategies using escape
580 * codes. Hopefully this will matter little and such
581 * representation strategies don't also include 8bit chars.
584 rfc1522_encoded_word(unsigned char *s
, int enc
, char *charset
)
586 int goal
= RFC1522_MAXW
- RFC1522_OVERHEAD(charset
);
588 if(enc
== 'B') /* base64 encode */
589 for(goal
= ((goal
/ 4) * 3) - 2; goal
&& *s
; goal
--, s
++)
591 else /* special 'Q' encoding */
592 if(!strucmp(charset
, "UTF-8")){ /* special handling for utf-8 */
595 for(; goal
&& *s
; s
++){
601 : *s
< 0xfe ? 5 : -1;
602 if(more
>= 0){ /* check that we have at least more characters */
603 for(p
= s
, i
= 0; i
<= more
&& *p
!= '\0'; i
++, p
++)
604 goal
-= RFC1522_ENC_CHAR(*p
) ? 3 : 1;
605 if(goal
< 0) /* does not fit in encoded word */
607 s
+= i
- 1; /* i - 1 should be equal to more */
609 else /* encode it, and skip it */
610 if((goal
-= RFC1522_ENC_CHAR(*s
) ? 3 : 1) < 0)
615 for(; goal
&& *s
; s
++)
616 if((goal
-= RFC1522_ENC_CHAR(*s
) ? 3 : 1) < 0)
625 * rfc1522_8bit -- apply RFC 1522 'Q' encoding to the given 8bit buffer
627 * Return: alloc'd buffer containing encoded string
630 rfc1522_8bit(void *src
, int slen
)
632 char *ret
= (char *) fs_get ((size_t) (3*slen
+ 2));
635 unsigned char *s
= (unsigned char *) src
;
637 while (slen
--) { /* for each character */
638 if (((c
= *s
++) == '\015') && (*s
== '\012') && slen
) {
639 *d
++ = '\015'; /* true line break */
643 else if(c
== SPACE
){ /* special encoding case */
646 else if(RFC1522_ENC_CHAR(c
)){
647 *d
++ = '='; /* quote character */
651 *d
++ = (char) c
; /* ordinary character */
654 *d
= '\0'; /* tie off destination */
660 * rfc1522_binary -- apply RFC 1522 'B' encoding to the given 8bit buffer
662 * Return: alloc'd buffer containing encoded string
665 rfc1522_binary (void *src
, int srcl
)
668 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
669 unsigned char *s
= (unsigned char *) src
;
672 d
= ret
= (char *) fs_get ((size_t) ((((srcl
+ 2) / 3) * 4) + 1));
673 for (; srcl
; s
+= 3) { /* process tuplets */
674 /* byte 1: high 6 bits (1) */
676 /* byte 2: low 2 bits (1), high 4 bits (2) */
677 *d
++ = v
[((s
[0] << 4) + (--srcl
? (s
[1] >> 4) : 0)) & 0x3f];
678 /* byte 3: low 4 bits (2), high 2 bits (3) */
679 *d
++ = srcl
? v
[((s
[1] << 2) + (--srcl
? (s
[2] >> 6) :0)) & 0x3f] :'=';
680 /* byte 4: low 6 bits (3) */
681 *d
++ = srcl
? v
[s
[2] & 0x3f] : '=';
683 srcl
--; /* count third character if processed */
686 *d
= '\0'; /* tie off string */
687 return(ret
); /* return the resulting string */
692 * Checks if charset conversion is possible and which quality could be achieved
694 * args: from_cs -- charset to convert from
695 * to_cs -- charset to convert to
698 * CONV_TABLE->table -- conversion table, NULL if conversion not needed
700 * CONV_TABLE->quality -- conversion quality (conversion not supported, not
701 * needed, loses special chars, or loses letters
703 * The other entries of CONV_TABLE are used inside this function only
704 * and may not be used outside unless this documentation is updated.
707 conversion_table(char *from_cs
, char *to_cs
)
710 unsigned char *p
= NULL
;
711 unsigned short *fromtab
, *totab
;
712 CONV_TABLE
*ct
= NULL
;
713 const CHARSET
*from
, *to
;
714 static CONV_TABLE null_tab
;
716 if(!(from_cs
&& *from_cs
&& to_cs
&& *to_cs
) || !strucmp(from_cs
, to_cs
)){
717 memset(&null_tab
, 0, sizeof(null_tab
));
718 null_tab
.quality
= CV_NO_TRANSLATE_NEEDED
;
723 * First check to see if we are already set up for this pair of charsets.
725 if((ct
= ps_global
->conv_table
) != NULL
726 && ct
->from_charset
&& ct
->to_charset
727 && !strucmp(ct
->from_charset
, from_cs
)
728 && !strucmp(ct
->to_charset
, to_cs
))
732 * No such luck. Get rid of the cache of the previous translation table
733 * and build a new one.
736 if(ct
->table
&& (ct
->convert
!= gf_convert_utf8_charset
))
737 fs_give((void **) &ct
->table
);
740 fs_give((void **) &ct
->from_charset
);
743 fs_give((void **) &ct
->to_charset
);
746 ct
= ps_global
->conv_table
= (CONV_TABLE
*) fs_get(sizeof(*ct
));
748 memset(ct
, 0, sizeof(*ct
));
750 ct
->from_charset
= cpystr(from_cs
);
751 ct
->to_charset
= cpystr(to_cs
);
752 ct
->quality
= CV_NO_TRANSLATE_POSSIBLE
;
755 * Check to see if a translation is feasible.
757 from
= utf8_charset(from_cs
);
758 to
= utf8_charset(to_cs
);
760 if(from
&& to
){ /* if both charsets found */
761 /* no mapping if same or from is ASCII */
762 if((from
->type
== to
->type
&& from
->tab
== to
->tab
)
763 || (from
->type
== CT_ASCII
))
764 ct
->quality
= CV_NO_TRANSLATE_NEEDED
;
765 else switch(from
->type
){
766 case CT_1BYTE0
: /* 1 byte no table */
767 case CT_1BYTE
: /* 1 byte ASCII + table 0x80-0xff */
768 case CT_1BYTE8
: /* 1 byte table 0x00 - 0xff */
770 case CT_1BYTE0
: /* 1 byte no table */
771 case CT_1BYTE
: /* 1 byte ASCII + table 0x80-0xff */
772 case CT_1BYTE8
: /* 1 byte table 0x00 - 0xff */
773 ct
->quality
= (from
->script
& to
->script
) ?
774 CV_LOSES_SOME_LETTERS
: CV_LOSES_SPECIAL_CHARS
;
778 case CT_UTF8
: /* variable UTF-8 encoded Unicode no table */
779 /* If source is UTF-8, see if destination charset has an 8 or 16 bit
780 * coded character set that we can translate to. By special
781 * dispensation, kludge ISO-2022-JP to EUC or Shift-JIS, but don't
782 * try to do any other ISO 2022 charsets or UTF-7.
785 case CT_SJIS
: /* 2 byte Shift-JIS */
786 /* only win if can get EUC-JP chartab */
787 if(utf8_charset("EUC-JP"))
788 ct
->quality
= CV_LOSES_SOME_LETTERS
;
790 case CT_ASCII
: /* 7-bit ASCII no table */
791 case CT_1BYTE0
: /* 1 byte no table */
792 case CT_1BYTE
: /* 1 byte ASCII + table 0x80-0xff */
793 case CT_1BYTE8
: /* 1 byte table 0x00 - 0xff */
794 case CT_EUC
: /* 2 byte ASCII + utf8_eucparam base/CS2/CS3 */
795 case CT_DBYTE
: /* 2 byte ASCII + utf8_eucparam */
796 case CT_DBYTE2
: /* 2 byte ASCII + utf8_eucparam plane1/2 */
797 ct
->quality
= CV_LOSES_SOME_LETTERS
;
803 switch (ct
->quality
) { /* need to map? */
804 case CV_NO_TRANSLATE_POSSIBLE
:
805 case CV_NO_TRANSLATE_NEEDED
:
806 break; /* no mapping needed */
807 default: /* do mapping */
808 switch (from
->type
) {
809 case CT_UTF8
: /* UTF-8 to legacy character set */
810 if((ct
->table
= utf8_rmap (to_cs
)) != NULL
)
811 ct
->convert
= gf_convert_utf8_charset
;
814 case CT_1BYTE0
: /* ISO 8859-1 */
815 case CT_1BYTE
: /* low part ASCII, high part other */
816 case CT_1BYTE8
: /* low part has some non-ASCII */
818 * The fromtab and totab tables are mappings from the 128 character
819 * positions 128-255 to their Unicode values (so unsigned shorts).
820 * The table we are creating is such that if
822 * from_char_value -> unicode_value
823 * to_char_value -> same_unicode_value
825 * then we want to map from_char_value -> to_char_value
827 * To simplify conversions we create the whole 256 element array,
828 * with the first 128 positions just the identity. If there is no
829 * conversion for a particular from_char_value (that is, no
830 * to_char_value maps to the same unicode character) then we put
831 * '?' in that character. We may want to output blob on the PC,
834 * If fromtab or totab are NULL, that means the mapping is simply
835 * the identity mapping. Since that is still useful to us, we
836 * create it on the fly.
838 fromtab
= (unsigned short *) from
->tab
;
839 totab
= (unsigned short *) to
->tab
;
841 ct
->convert
= gf_convert_8bit_charset
;
842 p
= ct
->table
= (unsigned char *)
843 fs_get(256 * sizeof(unsigned char));
844 for(i
= 0; i
< 256; i
++){
847 switch(from
->type
){ /* get "from" UCS-2 codepoint */
848 case CT_1BYTE0
: /* ISO 8859-1 */
851 case CT_1BYTE
: /* low part ASCII, high part other */
852 fc
= (i
< 128) ? i
: fromtab
[i
-128];
854 case CT_1BYTE8
: /* low part has some non-ASCII */
858 switch(to
->type
){ /* match against "to" UCS-2 codepoint */
859 case CT_1BYTE0
: /* identity match for ISO 8859-1*/
863 case CT_1BYTE
: /* ASCII is identity, search high part */
864 if(fc
< 128) p
[i
] = fc
;
865 else for(j
= 0; j
< 128; j
++){
872 case CT_1BYTE8
: /* search all codepoints */
873 for(j
= 0; j
< 256; j
++){
892 * Replace personal names in list of addresses with
893 * decoded personal names in UTF-8.
894 * Assumes we can free and reallocate the name.
897 decode_addr_names_to_utf8(struct mail_address
*a
)
899 for(; a
; a
= a
->next
)
901 convert_possibly_encoded_str_to_utf8(&a
->personal
);
906 * Strp is a pointer to an allocated string.
907 * This routine will convert the string to UTF-8, possibly
908 * freeing and re-allocating it.
909 * The source string may or may not have RFC1522 encoding
910 * which will be undone using rfc1522_decode.
911 * The string will have been converted on return.
914 convert_possibly_encoded_str_to_utf8(char **strp
)
916 size_t len
, lensrc
, lenresult
;
917 char *bufp
, *decoded
;
919 if(!strp
|| !*strp
|| **strp
== '\0')
922 len
= 4 * strlen(*strp
) + 1;
923 bufp
= (char *) fs_get(len
);
925 decoded
= (char *) rfc1522_decode_to_utf8((unsigned char *) bufp
, len
, *strp
);
926 if(decoded
!= (*strp
)){ /* unchanged */
927 if((lensrc
=strlen(*strp
)) >= (lenresult
=strlen(decoded
))){
928 strncpy(*strp
, decoded
, lensrc
);
929 (*strp
)[lensrc
] = '\0';
932 fs_give((void **) strp
);
933 if(decoded
== bufp
){ /* this will be true */
934 fs_resize((void **) &bufp
, lenresult
+1);
938 else{ /* this is unreachable */
939 *strp
= cpystr(decoded
);
943 /* else, already UTF-8 */
946 fs_give((void **) &bufp
);