1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3 *@ for _header() versions: including "encoded word" as of RFC 2049):
4 *@ - Quoted-Printable, section 6.7
5 *@ - Base64, section 6.8
6 *@ TODO We have no notion of a "current message context" and thus badly log.
7 *@ TODO This is not final yet, v15 will bring "filters".
9 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
10 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
13 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
15 * Copyright (c) 2006 The NetBSD Foundation, Inc.
16 * All rights reserved.
18 * This code is derived from software contributed to The NetBSD Foundation
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
24 * 1. Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * 2. Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
30 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
43 #define n_FILE mime_enc
45 #ifndef HAVE_AMALGAMATION
51 a_ME_Q
= 1, /* Must quote */
53 a_ME_XF
= 3, /* Special character 'F' - maybe quoted */
54 a_ME_XD
= 4, /* Special character '.' - maybe quoted */
55 a_ME_UU
= 5, /* In header, _ must be quoted in encoded word */
56 a_ME_US
= '_', /* In header, ' ' must be quoted as _ in encoded word */
57 a_ME_QM
= '?', /* In header, special character ? not always quoted */
58 a_ME_EQ
= '=', /* In header, '=' must be quoted in encoded word */
59 a_ME_HT
='\t', /* Body HT=SP. Head HT=HT, BUT quote in encoded word */
60 a_ME_NL
= 0, /* Don't quote '\n' (NL) */
61 a_ME_CR
= a_ME_Q
/* Always quote a '\r' (CR) */
64 /* Lookup tables to decide whether a character must be encoded or not.
65 * Email header differences according to RFC 2047, section 4.2:
66 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
67 * - don't care about the special ^F[rom] and ^.$ */
68 static ui8_t
const a_me_qp_body
[] = {
69 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
70 a_ME_Q
, a_ME_SP
, a_ME_NL
, a_ME_Q
, a_ME_Q
, a_ME_CR
, a_ME_Q
, a_ME_Q
,
71 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
72 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
73 a_ME_SP
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
74 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_XD
, a_ME_N
,
75 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
76 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_Q
, a_ME_N
, a_ME_N
,
78 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_XF
, a_ME_N
,
79 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
80 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
81 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
82 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
83 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
84 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
85 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_Q
,
87 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
88 a_ME_Q
, a_ME_HT
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
89 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
90 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
91 a_ME_US
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
92 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
93 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
94 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_EQ
, a_ME_N
, a_ME_QM
,
96 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
97 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
98 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
99 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_UU
,
100 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
101 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
102 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
103 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_Q
,
106 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
107 static char const a_me_b64_enctbl
[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
108 "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
109 static signed char const a_me_b64__dectbl
[] = {
110 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
111 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
112 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
113 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
114 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
115 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
116 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
117 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
119 #define a_ME_B64_EQU (ui32_t)-2
120 #define a_ME_B64_BAD (ui32_t)-1
121 #define a_ME_B64_DECUI8(C) \
122 ((ui8_t)(C) >= sizeof(a_me_b64__dectbl)\
123 ? a_ME_B64_BAD : (ui32_t)a_me_b64__dectbl[(ui8_t)(C)])
125 /* (Ugly to place an enum here) */
126 static char const a_me_ctes
[] = "7bit\0" "8bit\0" \
127 "base64\0" "quoted-printable\0" "binary\0" \
128 /* abbrevs */ "8b\0" "b64\0" "qp\0";
130 a_ME_CTES_7B_OFF
= 0, a_ME_CTES_7B_LEN
= 4,
131 a_ME_CTES_8B_OFF
= 5, a_ME_CTES_8B_LEN
= 4,
132 a_ME_CTES_B64_OFF
= 10, a_ME_CTES_B64_LEN
= 6,
133 a_ME_CTES_QP_OFF
= 17, a_ME_CTES_QP_LEN
= 16,
134 a_ME_CTES_BIN_OFF
= 34, a_ME_CTES_BIN_LEN
= 6,
136 a_ME_CTES_S8B_OFF
= 41, a_ME_CTES_S8B_LEN
= 2,
137 a_ME_CTES_SB64_OFF
= 44, a_ME_CTES_SB64_LEN
= 3,
138 a_ME_CTES_SQP_OFF
= 48, a_ME_CTES_SQP_LEN
= 2
141 /* Check whether *s must be quoted according to flags, else body rules;
142 * sol indicates whether we are at the first character of a line/field */
143 SINLINE
enum a_me_qact
a_me_mustquote(char const *s
, char const *e
, bool_t sol
,
144 enum mime_enc_flags flags
);
146 /* Trim WS and make work point to the decodable range of in.
147 * Return the amount of bytes a b64_decode operation on that buffer requires,
148 * or UIZ_MAX on overflow error */
149 static size_t a_me_b64_decode_prepare(struct str
*work
, struct str
const *in
);
151 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
152 * Return number of useful bytes in out or -1 on error.
153 * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
154 static ssize_t
a_me_b64_decode(struct str
*out
, struct str
*in
);
156 SINLINE
enum a_me_qact
157 a_me_mustquote(char const *s
, char const *e
, bool_t sol
,
158 enum mime_enc_flags flags
){
163 qtab
= (flags
& (MIMEEF_ISHEAD
| MIMEEF_ISENCWORD
))
164 ? a_me_qp_head
: a_me_qp_body
;
166 if((ui8_t
)*s
> 0x7F){
173 if((r
= a
) == a_ME_N
|| a
== a_ME_Q
)
178 /* Special header fields */
179 if(flags
& (MIMEEF_ISHEAD
| MIMEEF_ISENCWORD
)){
180 /* Special massage for encoded words */
181 if(flags
& MIMEEF_ISENCWORD
){
195 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
196 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
197 * should be hard to match */
198 if(a
== a_ME_QM
&& ((!sol
&& s
[-1] == '=') || (s
< e
&& s
[1] == '=')))
206 /* WS only if trailing white space */
207 if(&s
[1] == e
|| s
[1] == '\n')
212 /* Rest are special begin-of-line cases */
218 if(&s
[4] < e
&& s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm' && s
[4] == ' ')
223 if(a
== a_ME_XD
&& (&s
[1] == e
|| s
[1] == '\n'))
233 a_me_b64_decode_prepare(struct str
*work
, struct str
const *in
){
238 cp_len
= n_str_trim(work
)->l
;
242 if(UIZ_MAX
/ 3 <= cp_len
){
246 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
248 cp_len
+= (2 * 3) +1;
255 a_me_b64_decode(struct str
*out
, struct str
*in
){
257 ui8_t
const *q
, *end
;
262 p
= (ui8_t
*)&out
->s
[out
->l
];
263 q
= (ui8_t
const*)in
->s
;
265 for(end
= &q
[in
->l
]; PTR2SIZE(end
- q
) >= 4; q
+= 4){
268 a
= a_ME_B64_DECUI8(q
[0]);
269 b
= a_ME_B64_DECUI8(q
[1]);
270 c
= a_ME_B64_DECUI8(q
[2]);
271 d
= a_ME_B64_DECUI8(q
[3]);
273 if(n_UNLIKELY(a
>= a_ME_B64_EQU
|| b
>= a_ME_B64_EQU
||
274 c
== a_ME_B64_BAD
|| d
== a_ME_B64_BAD
))
277 pb
= ((a
<< 2) | ((b
& 0x30) >> 4));
278 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
281 if(c
== a_ME_B64_EQU
){ /* got '=' */
283 if(n_UNLIKELY(d
!= a_ME_B64_EQU
))
288 pb
= (((b
& 0x0F) << 4) | ((c
& 0x3C) >> 2));
289 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
292 if(d
== a_ME_B64_EQU
) /* got '=' */
294 pb
= (((c
& 0x03) << 6) | d
);
295 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
303 i
= PTR2SIZE((char*)p
- out
->s
);
308 in
->l
-= PTR2SIZE(q
- (ui8_t
*)in
->s
);
309 in
->s
= n_UNCONST(q
);
315 mime_enc_target(void){
316 char const *cp
, *v15
;
320 if((v15
= ok_vlook(encoding
)) != NULL
)
321 n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
323 if((cp
= ok_vlook(mime_encoding
)) == NULL
&& (cp
= v15
) == NULL
)
324 rv
= MIME_DEFAULT_ENCODING
;
325 else if(!asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_S8B_OFF
]) ||
326 !asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_8B_OFF
]))
328 else if(!asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_SB64_OFF
]) ||
329 !asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_B64_OFF
]))
331 else if(!asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_SQP_OFF
]) ||
332 !asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_QP_OFF
]))
335 n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp
);
343 mime_enc_from_ctehead(char const *hbody
){
355 } const *cte
, cte_base
[] = {
356 {a_ME_CTES_7B_OFF
, a_ME_CTES_7B_LEN
, MIMEE_7B
, 0},
357 {a_ME_CTES_8B_OFF
, a_ME_CTES_8B_LEN
, MIMEE_8B
, 0},
358 {a_ME_CTES_B64_OFF
, a_ME_CTES_B64_LEN
, MIMEE_B64
, 0},
359 {a_ME_CTES_QP_OFF
, a_ME_CTES_QP_LEN
, MIMEE_QP
, 0},
360 {a_ME_CTES_BIN_OFF
, a_ME_CTES_BIN_LEN
, MIMEE_BIN
, 0},
361 {0, 0, MIMEE_NONE
, 0}
363 union {char const *s
; size_t l
;} u
;
366 for(u
.s
= ++hbody
; *u
.s
!= '\0' && *u
.s
!= '"'; ++u
.s
)
369 for(u
.s
= hbody
; *u
.s
!= '\0' && !whitechar(*u
.s
); ++u
.s
)
371 u
.l
= PTR2SIZE(u
.s
- hbody
);
373 for(cte
= cte_base
;;)
374 if(cte
->len
== u
.l
&& !asccasecmp(&a_me_ctes
[cte
->off
], hbody
)){
377 }else if((++cte
)->enc
== MIMEE_NONE
){
387 mime_enc_from_conversion(enum conversion
const convert
){
392 case CONV_7BIT
: rv
= &a_me_ctes
[a_ME_CTES_7B_OFF
]; break;
393 case CONV_8BIT
: rv
= &a_me_ctes
[a_ME_CTES_8B_OFF
]; break;
394 case CONV_TOQP
: rv
= &a_me_ctes
[a_ME_CTES_QP_OFF
]; break;
395 case CONV_TOB64
: rv
= &a_me_ctes
[a_ME_CTES_B64_OFF
]; break;
396 case CONV_NONE
: rv
= &a_me_ctes
[a_ME_CTES_BIN_OFF
]; break;
397 default: rv
= n_empty
; break;
404 mime_enc_mustquote(char const *ln
, size_t lnlen
, enum mime_enc_flags flags
){
409 for(rv
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
410 switch(a_me_mustquote(ln
, ln
+ lnlen
, sol
, flags
)){
414 assert(flags
& MIMEEF_ISENCWORD
);
426 qp_encode_calc_size(size_t len
){
430 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
431 * However, we must be aware that (a) the output may span multiple lines
432 * and (b) the input does not end with a newline itself (nonetheless):
433 * LC_ALL=C awk 'BEGIN{
434 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
436 * s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
438 /* Several n_ERR_OVERFLOW */
439 if(len
>= UIZ_MAX
/ 3){
444 lines
= bytes
/ QP_LINESIZE
;
447 if(len
>= UIZ_MAX
/ 3){
451 /* Trailing hard NL may be missing, so there may be two lines.
452 * Thus add soft + hard NL per line and a trailing NUL */
454 lines
= (bytes
/ QP_LINESIZE
) + 1;
457 /*if(UIZ_MAX - bytes >= lines){
470 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
){
474 in
.s
= n_UNCONST(cp
);
476 out
= qp_encode(out
, &in
, flags
);
482 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
487 in
.s
= n_UNCONST(vp
);
489 out
= qp_encode(out
, &in
, flags
);
496 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
){
503 sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
);
505 if(!(flags
& QP_BUF
)){
506 if((lnlen
= qp_encode_calc_size(in
->l
)) == UIZ_MAX
){
510 out
->s
= (flags
& QP_SALLOC
) ? salloc(lnlen
) : srealloc(out
->s
, lnlen
);
516 if(flags
& QP_ISHEAD
){
517 enum mime_enc_flags ef
;
519 ef
= MIMEEF_ISHEAD
| (flags
& QP_ISENCWORD
? MIMEEF_ISENCWORD
: 0);
521 for(seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
){
525 mq
= a_me_mustquote(is
, ie
, sol
, ef
);
529 /* We convert into a single *encoded-word*, that'll end up in
530 * =?C?Q??=; quote '?' from when we're inside there on */
531 if(seenx
&& c
== '?')
534 }else if(mq
== a_ME_US
)
540 qp
= n_c_to_hex_base16(qp
, c
) + 1;
546 /* The body needs to take care for soft line breaks etc. */
547 for(lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
){
551 mq
= a_me_mustquote(is
, ie
, sol
, MIMEEF_NONE
);
554 if(mq
== a_ME_N
&& (c
!= '\n' || !seenx
)){
556 if(++lnlen
< QP_LINESIZE
- 1)
558 /* Don't write a soft line break when we're in the last possible
559 * column and either an LF has been written or only an LF follows, as
560 * that'll end the line anyway */
561 /* XXX but - ensure is+1>=ie, then??
562 * xxx and/or - what about resetting lnlen; that contra
563 * xxx dicts input==1 input line assertion, though */
564 if(c
== '\n' || is
== ie
|| is
[0] == '\n' || is
[1] == '\n')
574 if(lnlen
> QP_LINESIZE
- 3 - 1){
581 qp
= n_c_to_hex_base16(qp
, c
);
584 if(c
!= '\n' || !seenx
)
592 /* Enforce soft line break if we haven't seen LF */
593 if(in
->l
> 0 && *--is
!= '\n'){
599 out
->l
= PTR2SIZE(qp
- out
->s
);
600 out
->s
[out
->l
] = '\0';
607 qp_decode_header(struct str
*out
, struct str
const *in
){
613 if(UIZ_MAX
-1 - out
->l
<= in
->l
||
614 SI32_MAX
<= out
->l
+ in
->l
){ /* XXX wrong, we may replace */
621 n_string_reserve(n_string_take_ownership(&s
, out
->s
,
622 (out
->l
== 0 ? 0 : out
->l
+1), out
->l
),
623 in
->l
+ (in
->l
>> 2));
625 for(is
= in
->s
, ie
= &is
[in
->l
- 1]; is
<= ie
;){
631 goto jpushc
; /* TODO According to RFC 2045, 6.7,
632 * ++is; TODO we should warn the user, but have no context
633 * goto jehead; TODO to do so; can't over and over */
634 }else if((c
= n_c_from_hex_base16(is
)) >= 0){
638 /* Invalid according to RFC 2045, section 6.7 */
639 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
643 * TODO if(n_psonce & n_PSO_UNICODE)
644 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
652 if(c
== '_' /* a_ME_US */)
654 n_string_push_c(&s
, (char)c
);
658 out
->s
= n_string_cp(&s
);
660 n_string_gut(n_string_drop_ownership(&s
));
663 return (out
!= NULL
);
667 qp_decode_part(struct str
*out
, struct str
const *in
, struct str
*outrest
,
668 struct str
*inrest_or_null
){
669 struct n_string s
, *sp
;
676 outrest
->s
= n_UNCONST(is
);
681 if(UIZ_MAX
-1 - out
->l
<= in
->l
||
682 SI32_MAX
<= out
->l
+ in
->l
) /* XXX wrong, we may replace */
685 sp
= n_string_creat(&s
);
686 sp
= n_string_take_ownership(sp
, out
->s
,
687 (out
->l
== 0 ? 0 : out
->l
+1), out
->l
);
688 sp
= n_string_reserve(sp
, in
->l
+ (in
->l
>> 2));
690 for(is
= in
->s
, ie
= &is
[in
->l
- 1]; is
<= ie
;){
693 if((c
= *is
++) != '='){
695 n_string_push_c(sp
, (char)c
);
700 * Therefore, when decoding a Quoted-Printable body, any
701 * trailing white space on a line must be deleted, as it will
702 * necessarily have been added by intermediate transport
704 for(; is
<= ie
&& blankchar(*is
); ++is
)
707 /* Soft line break? */
710 goto jpushc
; /* TODO According to RFC 2045, 6.7,
711 * ++is; TODO we should warn the user, but have no context
712 * goto jebody; TODO to do so; can't over and over */
715 /* Not a soft line break? */
717 if((c
= n_c_from_hex_base16(is
)) >= 0){
721 /* Invalid according to RFC 2045, section 6.7 */
722 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
726 * TODO if(n_psonce & n_PSO_UNICODE)
727 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
734 /* CRLF line endings are encoded as QP, followed by a soft line break, so
735 * check for this special case, and simply forget we have seen one, so as
736 * not to end up with the entire DOS file in a contiguous buffer */
738 if(sp
->s_len
> 0 && sp
->s_dat
[sp
->s_len
- 1] == '\n'){
739 #if 0 /* TODO qp_decode_part() we do not normalize CRLF
740 * TODO to LF because for that we would need
741 * TODO to know if we are about to write to
742 * TODO the display or do save the file!
743 * TODO 'hope the MIME/send layer rewrite will
744 * TODO offer the possibility to DTRT */
745 if(sp
->s_len
> 1 && sp
->s_dat
[sp
->s_len
- 2] == '\r')
746 n_string_push_c(n_string_trunc(sp
, sp
->s_len
- 2), '\n');
755 if((l
= PTR2SIZE(ie
- is
)) > 0){
756 if(inrest_or_null
== NULL
)
758 n_str_assign_buf(inrest_or_null
, is
, l
);
761 outrest
->s
= n_string_cp(sp
);
762 outrest
->l
= s
.s_len
;
763 n_string_drop_ownership(sp
);
770 out
->s
= n_string_cp(sp
);
772 n_string_gut(n_string_drop_ownership(sp
));
775 return (out
!= NULL
);
783 b64_encode_calc_size(size_t len
){
785 if(len
>= UIZ_MAX
/ 4)
789 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
790 len
+= 2 + 1; /* CRLF, \0 */
797 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
){
803 assert(!(flags
& B64_NOPAD
) ||
804 !(flags
& (B64_CRLF
| B64_LF
| B64_MULTILINE
)));
806 p
= (ui8_t
const*)in
->s
;
808 if(!(flags
& B64_BUF
)){
809 if((i
= b64_encode_calc_size(in
->l
)) == UIZ_MAX
){
813 out
->s
= (flags
& B64_SALLOC
) ? salloc(i
) : srealloc(out
->s
, i
);
817 if(!(flags
& (B64_CRLF
| B64_LF
)))
818 flags
&= ~B64_MULTILINE
;
820 for(lnlen
= 0, i
= in
->l
; (ssize_t
)i
> 0; p
+= 3, i
-= 3){
824 b64
[0] = a_me_b64_enctbl
[a
>> 2];
828 b64
[1] = a_me_b64_enctbl
[((a
& 0x3) << 4)];
834 b64
[1] = a_me_b64_enctbl
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
835 b64
[2] = a_me_b64_enctbl
[((b
& 0x0F) << 2)];
841 b64
[1] = a_me_b64_enctbl
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
842 b64
[2] = a_me_b64_enctbl
[((b
& 0x0F) << 2) | ((c
& 0xC0u
) >> 6)];
843 b64
[3] = a_me_b64_enctbl
[c
& 0x3F];
848 if(!(flags
& B64_MULTILINE
))
851 if(lnlen
< B64_LINESIZE
)
857 if(flags
& (B64_CRLF
| B64_LF
))
861 if((flags
& (B64_CRLF
| B64_LF
)) &&
862 (!(flags
& B64_MULTILINE
) || lnlen
!= 0)){
865 if(flags
& (B64_CRLF
| B64_LF
))
867 }else if(flags
& B64_NOPAD
)
868 while(b64
!= out
->s
&& b64
[-1] == '=')
871 out
->l
= PTR2SIZE(b64
- out
->s
);
872 out
->s
[out
->l
] = '\0';
874 /* Base64 includes + and /, replace them with _ and -.
875 * This is base64url according to RFC 4648, then. Since we only support
876 * that for encoding and it is only used for boundary strings, this is
877 * yet a primitive implementation; xxx use tables; support decoding */
878 if(flags
& B64_RFC4648URL
){
881 for(b64
= out
->s
; (c
= *b64
) != '\0'; ++b64
)
893 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
894 enum b64flags flags
){
898 in
.s
= n_UNCONST(vp
);
900 out
= b64_encode(out
, &in
, flags
);
907 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
){
911 in
.s
= n_UNCONST(cp
);
913 out
= b64_encode(out
, &in
, flags
);
920 b64_decode(struct str
*out
, struct str
const *in
){
927 if((len
= a_me_b64_decode_prepare(&work
, in
)) == UIZ_MAX
)
930 /* Ignore an empty input, as may happen for an empty final line */
932 out
->s
= srealloc(out
->s
, 1);
933 else if(work
.l
>= 4 && !(work
.l
& 3)){
934 out
->s
= srealloc(out
->s
, len
+1);
935 if((ssize_t
)(len
= a_me_b64_decode(out
, &work
)) < 0)
939 out
->s
[out
->l
] = '\0';
942 return (out
!= NULL
);
949 b64_decode_header(struct str
*out
, struct str
const *in
){
950 struct str outr
, inr
;
953 if(!b64_decode(out
, in
)){
954 memset(&outr
, 0, sizeof outr
);
955 memset(&inr
, 0, sizeof inr
);
957 if(!b64_decode_part(out
, in
, &outr
, &inr
) || outr
.l
> 0 || inr
.l
> 0)
966 return (out
!= NULL
);
970 b64_decode_part(struct str
*out
, struct str
const *in
, struct str
*outrest
,
971 struct str
*inrest_or_null
){
972 struct str work
, save
;
973 ui32_t a
, b
, c
, b64l
;
975 struct n_string s
, workbuf
;
980 if((len
= out
->l
) > 0 && out
->s
[len
] == '\0')
981 n_string_take_ownership(&s
, out
->s
, len
+1, len
);
984 n_string_push_buf(&s
, out
->s
, len
);
988 out
->s
= NULL
, out
->l
= 0;
989 n_string_creat(&workbuf
);
991 if((len
= a_me_b64_decode_prepare(&work
, in
)) == UIZ_MAX
)
995 n_string_push_buf(&s
, outrest
->s
, outrest
->l
);
1000 if(UIZ_MAX
- len
<= s
.s_len
||
1001 SI32_MAX
<= len
+ s
.s_len
) /* XXX wrong, we may replace */
1007 /* This text decoder is extremely expensive, especially given that in all
1008 * but _invalid_ cases it is not even needed! So try once to do the normal
1009 * decoding, if that fails, go the hard way */
1011 out
->s
= n_string_resize(&s
, len
+ (out
->l
= b64l
= s
.s_len
))->s_dat
;
1013 if(work
.l
>= 4 && a_me_b64_decode(out
, &work
) >= 0){
1014 n_string_trunc(&s
, out
->l
);
1019 n_string_trunc(&s
, b64l
);
1021 out
->s
= NULL
, out
->l
= 0;
1023 /* TODO b64_decode_part() does not yet STOP if it sees padding, whereas
1024 * TODO OpenSSL and mutt simply bail on such stuff */
1031 x
= a_ME_B64_DECUI8((ui8_t
)(cx
= *work
.s
));
1034 if(x
>= a_ME_B64_EQU
)
1041 if(x
>= a_ME_B64_EQU
)
1048 if(x
== a_ME_B64_BAD
)
1055 if(x
== a_ME_B64_BAD
){
1057 /* TODO This would be wrong since iconv(3) may be applied first! */
1059 if(n_psonce
& n_PSO_UNICODE
)
1060 n_string_push_buf(&s
, n_unirepl
, sizeof(n_unirepl
) -1);
1062 n_string_push_c(&s
, '?');
1065 }else if(c
== a_ME_B64_EQU
&& x
!= a_ME_B64_EQU
){
1066 /* This is not only invalid but bogus. Skip it over! */
1067 /* TODO This would be wrong since iconv(3) may be applied first! */
1069 n_string_push_buf(&s
, n_UNIREPL n_UNIREPL n_UNIREPL n_UNIREPL
,
1070 (sizeof(n_UNIREPL
) -1) * 4);
1076 pb
= ((a
<< 2) | ((b
& 0x30) >> 4));
1077 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
1078 n_string_push_c(&s
, (char)pb
);
1079 pb
= (((b
& 0x0F) << 4) | ((c
& 0x3C) >> 2));
1080 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
1081 n_string_push_c(&s
, (char)pb
);
1082 if(x
!= a_ME_B64_EQU
){
1083 pb
= (((c
& 0x03) << 6) | x
);
1084 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
1085 n_string_push_c(&s
, (char)pb
);
1094 if(b64l
> 0 && b64l
!= 4){
1095 if(inrest_or_null
== NULL
)
1097 inrest_or_null
->s
= srealloc(inrest_or_null
->s
, b64l
+1);
1098 inrest_or_null
->s
[0] = ca
;
1100 inrest_or_null
->s
[1] = cb
;
1102 inrest_or_null
->s
[2] = cc
;
1103 inrest_or_null
->s
[inrest_or_null
->l
= b64l
] = '\0';
1112 out
->s
= n_string_cp(&s
);
1114 n_string_drop_ownership(&s
);
1116 n_string_gut(&workbuf
);
1119 return (out
!= NULL
);