1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3 *@ for _header() versions: including "encoded word" as of RFC 2049):
4 *@ - Quoted-Printable, section 6.7
5 *@ - Base64, section 6.8
6 *@ QP quoting and _b64_decode(), b64_encode() inspired from NetBSDs mailx(1):
7 *@ $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $
8 *@ TODO We have no notion of a "current message context" and thus badly log.
9 *@ TODO This is not final yet, v15 will bring "filters".
11 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 * SPDX-License-Identifier: ISC
14 * Permission to use, copy, modify, and/or distribute this software for any
15 * purpose with or without fee is hereby granted, provided that the above
16 * copyright notice and this permission notice appear in all copies.
18 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
19 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
21 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
24 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
27 #define n_FILE mime_enc
29 #ifndef HAVE_AMALGAMATION
35 a_ME_Q
= 1, /* Must quote */
37 a_ME_XF
= 3, /* Special character 'F' - maybe quoted */
38 a_ME_XD
= 4, /* Special character '.' - maybe quoted */
39 a_ME_UU
= 5, /* In header, _ must be quoted in encoded word */
40 a_ME_US
= '_', /* In header, ' ' must be quoted as _ in encoded word */
41 a_ME_QM
= '?', /* In header, special character ? not always quoted */
42 a_ME_EQ
= '=', /* In header, '=' must be quoted in encoded word */
43 a_ME_HT
='\t', /* Body HT=SP. Head HT=HT, BUT quote in encoded word */
44 a_ME_NL
= 0, /* Don't quote '\n' (NL) */
45 a_ME_CR
= a_ME_Q
/* Always quote a '\r' (CR) */
48 /* Lookup tables to decide whether a character must be encoded or not.
49 * Email header differences according to RFC 2047, section 4.2:
50 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
51 * - don't care about the special ^F[rom] and ^.$ */
52 static ui8_t
const a_me_qp_body
[] = {
53 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
54 a_ME_Q
, a_ME_SP
, a_ME_NL
, a_ME_Q
, a_ME_Q
, a_ME_CR
, a_ME_Q
, a_ME_Q
,
55 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
56 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
57 a_ME_SP
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
58 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_XD
, a_ME_N
,
59 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
60 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_Q
, a_ME_N
, a_ME_N
,
62 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_XF
, a_ME_N
,
63 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
64 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
65 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
66 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
67 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
68 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
69 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_Q
,
71 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
72 a_ME_Q
, a_ME_HT
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
73 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
74 a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
, a_ME_Q
,
75 a_ME_US
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
76 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
77 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
78 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_EQ
, a_ME_N
, a_ME_QM
,
80 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
81 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
82 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
83 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_UU
,
84 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
85 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
86 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
,
87 a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_N
, a_ME_Q
,
90 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
91 static char const a_me_b64_enctbl
[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
92 "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
93 static signed char const a_me_b64__dectbl
[] = {
94 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
95 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
96 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
97 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
98 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
99 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
100 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
101 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
103 #define a_ME_B64_EQU (ui32_t)-2
104 #define a_ME_B64_BAD (ui32_t)-1
105 #define a_ME_B64_DECUI8(C) \
106 ((ui8_t)(C) >= sizeof(a_me_b64__dectbl)\
107 ? a_ME_B64_BAD : (ui32_t)a_me_b64__dectbl[(ui8_t)(C)])
109 /* (Ugly to place an enum here) */
110 static char const a_me_ctes
[] = "7bit\0" "8bit\0" \
111 "base64\0" "quoted-printable\0" "binary\0" \
112 /* abbrevs */ "8b\0" "b64\0" "qp\0";
114 a_ME_CTES_7B_OFF
= 0, a_ME_CTES_7B_LEN
= 4,
115 a_ME_CTES_8B_OFF
= 5, a_ME_CTES_8B_LEN
= 4,
116 a_ME_CTES_B64_OFF
= 10, a_ME_CTES_B64_LEN
= 6,
117 a_ME_CTES_QP_OFF
= 17, a_ME_CTES_QP_LEN
= 16,
118 a_ME_CTES_BIN_OFF
= 34, a_ME_CTES_BIN_LEN
= 6,
120 a_ME_CTES_S8B_OFF
= 41, a_ME_CTES_S8B_LEN
= 2,
121 a_ME_CTES_SB64_OFF
= 44, a_ME_CTES_SB64_LEN
= 3,
122 a_ME_CTES_SQP_OFF
= 48, a_ME_CTES_SQP_LEN
= 2
125 /* Check whether *s must be quoted according to flags, else body rules;
126 * sol indicates whether we are at the first character of a line/field */
127 n_INLINE
enum a_me_qact
a_me_mustquote(char const *s
, char const *e
,
128 bool_t sol
, enum mime_enc_flags flags
);
130 /* Trim WS and make work point to the decodable range of in.
131 * Return the amount of bytes a b64_decode operation on that buffer requires,
132 * or UIZ_MAX on overflow error */
133 static size_t a_me_b64_decode_prepare(struct str
*work
, struct str
const *in
);
135 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
136 * Return number of useful bytes in out or -1 on error.
137 * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
138 static ssize_t
a_me_b64_decode(struct str
*out
, struct str
*in
);
140 n_INLINE
enum a_me_qact
141 a_me_mustquote(char const *s
, char const *e
, bool_t sol
,
142 enum mime_enc_flags flags
){
147 qtab
= (flags
& (MIMEEF_ISHEAD
| MIMEEF_ISENCWORD
))
148 ? a_me_qp_head
: a_me_qp_body
;
150 if((ui8_t
)*s
> 0x7F){
157 if((r
= a
) == a_ME_N
|| a
== a_ME_Q
)
162 /* Special header fields */
163 if(flags
& (MIMEEF_ISHEAD
| MIMEEF_ISENCWORD
)){
164 /* Special massage for encoded words */
165 if(flags
& MIMEEF_ISENCWORD
){
179 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
180 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
181 * should be hard to match */
182 if(a
== a_ME_QM
&& ((!sol
&& s
[-1] == '=') || (s
< e
&& s
[1] == '=')))
190 /* WS only if trailing white space */
191 if(&s
[1] == e
|| s
[1] == '\n')
196 /* Rest are special begin-of-line cases */
202 if(&s
[4] < e
&& s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm' && s
[4] == ' ')
207 if(a
== a_ME_XD
&& (&s
[1] == e
|| s
[1] == '\n'))
217 a_me_b64_decode_prepare(struct str
*work
, struct str
const *in
){
222 cp_len
= n_str_trim(work
, n_STR_TRIM_BOTH
)->l
;
226 if(UIZ_MAX
/ 3 <= cp_len
){
230 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
232 cp_len
+= (2 * 3) +1;
239 a_me_b64_decode(struct str
*out
, struct str
*in
){
241 ui8_t
const *q
, *end
;
246 p
= (ui8_t
*)&out
->s
[out
->l
];
247 q
= (ui8_t
const*)in
->s
;
249 for(end
= &q
[in
->l
]; PTR2SIZE(end
- q
) >= 4; q
+= 4){
252 a
= a_ME_B64_DECUI8(q
[0]);
253 b
= a_ME_B64_DECUI8(q
[1]);
254 c
= a_ME_B64_DECUI8(q
[2]);
255 d
= a_ME_B64_DECUI8(q
[3]);
257 if(n_UNLIKELY(a
>= a_ME_B64_EQU
|| b
>= a_ME_B64_EQU
||
258 c
== a_ME_B64_BAD
|| d
== a_ME_B64_BAD
))
261 pb
= ((a
<< 2) | ((b
& 0x30) >> 4));
262 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
265 if(c
== a_ME_B64_EQU
){ /* got '=' */
267 if(n_UNLIKELY(d
!= a_ME_B64_EQU
))
272 pb
= (((b
& 0x0F) << 4) | ((c
& 0x3C) >> 2));
273 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
276 if(d
== a_ME_B64_EQU
) /* got '=' */
278 pb
= (((c
& 0x03) << 6) | d
);
279 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
287 i
= PTR2SIZE((char*)p
- out
->s
);
292 in
->l
-= PTR2SIZE(q
- (ui8_t
*)in
->s
);
293 in
->s
= n_UNCONST(q
);
299 mime_enc_target(void){
300 char const *cp
, *v15
;
304 if((v15
= ok_vlook(encoding
)) != NULL
)
305 n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
307 if((cp
= ok_vlook(mime_encoding
)) == NULL
&& (cp
= v15
) == NULL
)
308 rv
= MIME_DEFAULT_ENCODING
;
309 else if(!asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_S8B_OFF
]) ||
310 !asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_8B_OFF
]))
312 else if(!asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_SB64_OFF
]) ||
313 !asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_B64_OFF
]))
315 else if(!asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_SQP_OFF
]) ||
316 !asccasecmp(cp
, &a_me_ctes
[a_ME_CTES_QP_OFF
]))
319 n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp
);
327 mime_enc_from_ctehead(char const *hbody
){
339 } const *cte
, cte_base
[] = {
340 {a_ME_CTES_7B_OFF
, a_ME_CTES_7B_LEN
, MIMEE_7B
, 0},
341 {a_ME_CTES_8B_OFF
, a_ME_CTES_8B_LEN
, MIMEE_8B
, 0},
342 {a_ME_CTES_B64_OFF
, a_ME_CTES_B64_LEN
, MIMEE_B64
, 0},
343 {a_ME_CTES_QP_OFF
, a_ME_CTES_QP_LEN
, MIMEE_QP
, 0},
344 {a_ME_CTES_BIN_OFF
, a_ME_CTES_BIN_LEN
, MIMEE_BIN
, 0},
345 {0, 0, MIMEE_NONE
, 0}
347 union {char const *s
; size_t l
;} u
;
350 for(u
.s
= ++hbody
; *u
.s
!= '\0' && *u
.s
!= '"'; ++u
.s
)
353 for(u
.s
= hbody
; *u
.s
!= '\0' && !whitechar(*u
.s
); ++u
.s
)
355 u
.l
= PTR2SIZE(u
.s
- hbody
);
357 for(cte
= cte_base
;;)
358 if(cte
->len
== u
.l
&& !asccasecmp(&a_me_ctes
[cte
->off
], hbody
)){
361 }else if((++cte
)->enc
== MIMEE_NONE
){
371 mime_enc_from_conversion(enum conversion
const convert
){
376 case CONV_7BIT
: rv
= &a_me_ctes
[a_ME_CTES_7B_OFF
]; break;
377 case CONV_8BIT
: rv
= &a_me_ctes
[a_ME_CTES_8B_OFF
]; break;
378 case CONV_TOQP
: rv
= &a_me_ctes
[a_ME_CTES_QP_OFF
]; break;
379 case CONV_TOB64
: rv
= &a_me_ctes
[a_ME_CTES_B64_OFF
]; break;
380 case CONV_NONE
: rv
= &a_me_ctes
[a_ME_CTES_BIN_OFF
]; break;
381 default: rv
= n_empty
; break;
388 mime_enc_mustquote(char const *ln
, size_t lnlen
, enum mime_enc_flags flags
){
393 for(rv
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
394 switch(a_me_mustquote(ln
, ln
+ lnlen
, sol
, flags
)){
398 assert(flags
& MIMEEF_ISENCWORD
);
410 qp_encode_calc_size(size_t len
){
414 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
415 * However, we must be aware that (a) the output may span multiple lines
416 * and (b) the input does not end with a newline itself (nonetheless):
417 * LC_ALL=C awk 'BEGIN{
418 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
420 * s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
422 /* Several n_ERR_OVERFLOW */
423 if(len
>= UIZ_MAX
/ 3){
428 lines
= bytes
/ QP_LINESIZE
;
431 if(len
>= UIZ_MAX
/ 3){
435 /* Trailing hard NL may be missing, so there may be two lines.
436 * Thus add soft + hard NL per line and a trailing NUL */
438 lines
= (bytes
/ QP_LINESIZE
) + 1;
441 /*if(UIZ_MAX - bytes >= lines){
454 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
){
458 in
.s
= n_UNCONST(cp
);
460 out
= qp_encode(out
, &in
, flags
);
466 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
471 in
.s
= n_UNCONST(vp
);
473 out
= qp_encode(out
, &in
, flags
);
480 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
){
487 sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
);
489 if(!(flags
& QP_BUF
)){
490 if((lnlen
= qp_encode_calc_size(in
->l
)) == UIZ_MAX
){
494 out
->s
= (flags
& QP_SALLOC
) ? n_autorec_alloc(lnlen
)
495 : n_realloc(out
->s
, lnlen
);
501 if(flags
& QP_ISHEAD
){
502 enum mime_enc_flags ef
;
504 ef
= MIMEEF_ISHEAD
| (flags
& QP_ISENCWORD
? MIMEEF_ISENCWORD
: 0);
506 for(seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
){
510 mq
= a_me_mustquote(is
, ie
, sol
, ef
);
514 /* We convert into a single *encoded-word*, that'll end up in
515 * =?C?Q??=; quote '?' from when we're inside there on */
516 if(seenx
&& c
== '?')
519 }else if(mq
== a_ME_US
)
525 qp
= n_c_to_hex_base16(qp
, c
) + 1;
531 /* The body needs to take care for soft line breaks etc. */
532 for(lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
){
536 mq
= a_me_mustquote(is
, ie
, sol
, MIMEEF_NONE
);
539 if(mq
== a_ME_N
&& (c
!= '\n' || !seenx
)){
541 if(++lnlen
< QP_LINESIZE
- 1)
543 /* Don't write a soft line break when we're in the last possible
544 * column and either an LF has been written or only an LF follows, as
545 * that'll end the line anyway */
546 /* XXX but - ensure is+1>=ie, then??
547 * xxx and/or - what about resetting lnlen; that contra
548 * xxx dicts input==1 input line assertion, though */
549 if(c
== '\n' || is
== ie
|| is
[0] == '\n' || is
[1] == '\n')
559 if(lnlen
> QP_LINESIZE
- 3 - 1){
566 qp
= n_c_to_hex_base16(qp
, c
);
569 if(c
!= '\n' || !seenx
)
577 /* Enforce soft line break if we haven't seen LF */
578 if(in
->l
> 0 && *--is
!= '\n'){
584 out
->l
= PTR2SIZE(qp
- out
->s
);
585 out
->s
[out
->l
] = '\0';
592 qp_decode_header(struct str
*out
, struct str
const *in
){
598 if(UIZ_MAX
-1 - out
->l
<= in
->l
||
599 SI32_MAX
<= out
->l
+ in
->l
){ /* XXX wrong, we may replace */
606 n_string_reserve(n_string_take_ownership(&s
, out
->s
,
607 (out
->l
== 0 ? 0 : out
->l
+1), out
->l
),
608 in
->l
+ (in
->l
>> 2));
610 for(is
= in
->s
, ie
= &is
[in
->l
- 1]; is
<= ie
;){
616 goto jpushc
; /* TODO According to RFC 2045, 6.7,
617 * ++is; TODO we should warn the user, but have no context
618 * goto jehead; TODO to do so; can't over and over */
619 }else if((c
= n_c_from_hex_base16(is
)) >= 0){
623 /* Invalid according to RFC 2045, section 6.7 */
624 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
628 * TODO if(n_psonce & n_PSO_UNICODE)
629 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
637 if(c
== '_' /* a_ME_US */)
639 n_string_push_c(&s
, (char)c
);
643 out
->s
= n_string_cp(&s
);
645 n_string_gut(n_string_drop_ownership(&s
));
648 return (out
!= NULL
);
652 qp_decode_part(struct str
*out
, struct str
const *in
, struct str
*outrest
,
653 struct str
*inrest_or_null
){
654 struct n_string s
, *sp
;
661 outrest
->s
= n_UNCONST(is
);
666 if(UIZ_MAX
-1 - out
->l
<= in
->l
||
667 SI32_MAX
<= out
->l
+ in
->l
) /* XXX wrong, we may replace */
670 sp
= n_string_creat(&s
);
671 sp
= n_string_take_ownership(sp
, out
->s
,
672 (out
->l
== 0 ? 0 : out
->l
+1), out
->l
);
673 sp
= n_string_reserve(sp
, in
->l
+ (in
->l
>> 2));
675 for(is
= in
->s
, ie
= &is
[in
->l
- 1]; is
<= ie
;){
678 if((c
= *is
++) != '='){
680 n_string_push_c(sp
, (char)c
);
685 * Therefore, when decoding a Quoted-Printable body, any
686 * trailing white space on a line must be deleted, as it will
687 * necessarily have been added by intermediate transport
689 for(; is
<= ie
&& blankchar(*is
); ++is
)
692 /* Soft line break? */
695 goto jpushc
; /* TODO According to RFC 2045, 6.7,
696 * ++is; TODO we should warn the user, but have no context
697 * goto jebody; TODO to do so; can't over and over */
700 /* Not a soft line break? */
702 if((c
= n_c_from_hex_base16(is
)) >= 0){
706 /* Invalid according to RFC 2045, section 6.7 */
707 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
711 * TODO if(n_psonce & n_PSO_UNICODE)
712 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
719 /* CRLF line endings are encoded as QP, followed by a soft line break, so
720 * check for this special case, and simply forget we have seen one, so as
721 * not to end up with the entire DOS file in a contiguous buffer */
723 if(sp
->s_len
> 0 && sp
->s_dat
[sp
->s_len
- 1] == '\n'){
724 #if 0 /* TODO qp_decode_part() we do not normalize CRLF
725 * TODO to LF because for that we would need
726 * TODO to know if we are about to write to
727 * TODO the display or do save the file!
728 * TODO 'hope the MIME/send layer rewrite will
729 * TODO offer the possibility to DTRT */
730 if(sp
->s_len
> 1 && sp
->s_dat
[sp
->s_len
- 2] == '\r')
731 n_string_push_c(n_string_trunc(sp
, sp
->s_len
- 2), '\n');
740 if((l
= PTR2SIZE(ie
- is
)) > 0){
741 if(inrest_or_null
== NULL
)
743 n_str_assign_buf(inrest_or_null
, is
, l
);
746 outrest
->s
= n_string_cp(sp
);
747 outrest
->l
= s
.s_len
;
748 n_string_drop_ownership(sp
);
755 out
->s
= n_string_cp(sp
);
757 n_string_gut(n_string_drop_ownership(sp
));
760 return (out
!= NULL
);
768 b64_encode_calc_size(size_t len
){
770 if(len
>= UIZ_MAX
/ 4)
774 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
775 len
+= 2 + 1; /* CRLF, \0 */
782 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
){
788 assert(!(flags
& B64_NOPAD
) ||
789 !(flags
& (B64_CRLF
| B64_LF
| B64_MULTILINE
)));
791 p
= (ui8_t
const*)in
->s
;
793 if(!(flags
& B64_BUF
)){
794 if((i
= b64_encode_calc_size(in
->l
)) == UIZ_MAX
){
798 out
->s
= (flags
& B64_SALLOC
) ? n_autorec_alloc(i
)
799 : n_realloc(out
->s
, i
);
803 if(!(flags
& (B64_CRLF
| B64_LF
)))
804 flags
&= ~B64_MULTILINE
;
806 for(lnlen
= 0, i
= in
->l
; (ssize_t
)i
> 0; p
+= 3, i
-= 3){
810 b64
[0] = a_me_b64_enctbl
[a
>> 2];
814 b64
[1] = a_me_b64_enctbl
[((a
& 0x3) << 4)];
820 b64
[1] = a_me_b64_enctbl
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
821 b64
[2] = a_me_b64_enctbl
[((b
& 0x0F) << 2)];
827 b64
[1] = a_me_b64_enctbl
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
828 b64
[2] = a_me_b64_enctbl
[((b
& 0x0F) << 2) | ((c
& 0xC0u
) >> 6)];
829 b64
[3] = a_me_b64_enctbl
[c
& 0x3F];
834 if(!(flags
& B64_MULTILINE
))
837 if(lnlen
< B64_LINESIZE
)
843 if(flags
& (B64_CRLF
| B64_LF
))
847 if((flags
& (B64_CRLF
| B64_LF
)) &&
848 (!(flags
& B64_MULTILINE
) || lnlen
!= 0)){
851 if(flags
& (B64_CRLF
| B64_LF
))
853 }else if(flags
& B64_NOPAD
)
854 while(b64
!= out
->s
&& b64
[-1] == '=')
857 out
->l
= PTR2SIZE(b64
- out
->s
);
858 out
->s
[out
->l
] = '\0';
860 /* Base64 includes + and /, replace them with _ and -.
861 * This is base64url according to RFC 4648, then. Since we only support
862 * that for encoding and it is only used for boundary strings, this is
863 * yet a primitive implementation; xxx use tables; support decoding */
864 if(flags
& B64_RFC4648URL
){
867 for(b64
= out
->s
; (c
= *b64
) != '\0'; ++b64
)
879 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
880 enum b64flags flags
){
884 in
.s
= n_UNCONST(vp
);
886 out
= b64_encode(out
, &in
, flags
);
893 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
){
897 in
.s
= n_UNCONST(cp
);
899 out
= b64_encode(out
, &in
, flags
);
906 b64_decode(struct str
*out
, struct str
const *in
){
913 if((len
= a_me_b64_decode_prepare(&work
, in
)) == UIZ_MAX
)
916 /* Ignore an empty input, as may happen for an empty final line */
918 out
->s
= n_realloc(out
->s
, 1);
919 else if(work
.l
>= 4 && !(work
.l
& 3)){
920 out
->s
= n_realloc(out
->s
, len
+1);
921 if((ssize_t
)(len
= a_me_b64_decode(out
, &work
)) < 0)
925 out
->s
[out
->l
] = '\0';
928 return (out
!= NULL
);
935 b64_decode_header(struct str
*out
, struct str
const *in
){
936 struct str outr
, inr
;
939 if(!b64_decode(out
, in
)){
940 memset(&outr
, 0, sizeof outr
);
941 memset(&inr
, 0, sizeof inr
);
943 if(!b64_decode_part(out
, in
, &outr
, &inr
) || outr
.l
> 0 || inr
.l
> 0)
952 return (out
!= NULL
);
956 b64_decode_part(struct str
*out
, struct str
const *in
, struct str
*outrest
,
957 struct str
*inrest_or_null
){
958 struct str work
, save
;
959 ui32_t a
, b
, c
, b64l
;
961 struct n_string s
, workbuf
;
966 if((len
= out
->l
) > 0 && out
->s
[len
] == '\0')
967 (void)n_string_take_ownership(&s
, out
->s
, len
+1, len
);
970 n_string_push_buf(&s
, out
->s
, len
);
974 out
->s
= NULL
, out
->l
= 0;
975 n_string_creat(&workbuf
);
977 if((len
= a_me_b64_decode_prepare(&work
, in
)) == UIZ_MAX
)
981 n_string_push_buf(&s
, outrest
->s
, outrest
->l
);
986 if(UIZ_MAX
- len
<= s
.s_len
||
987 SI32_MAX
<= len
+ s
.s_len
) /* XXX wrong, we may replace */
993 /* This text decoder is extremely expensive, especially given that in all
994 * but _invalid_ cases it is not even needed! So try once to do the normal
995 * decoding, if that fails, go the hard way */
997 out
->s
= n_string_resize(&s
, len
+ (out
->l
= b64l
= s
.s_len
))->s_dat
;
999 if(work
.l
>= 4 && a_me_b64_decode(out
, &work
) >= 0){
1000 n_string_trunc(&s
, out
->l
);
1005 n_string_trunc(&s
, b64l
);
1007 out
->s
= NULL
, out
->l
= 0;
1009 /* TODO b64_decode_part() does not yet STOP if it sees padding, whereas
1010 * TODO OpenSSL and mutt simply bail on such stuff */
1017 x
= a_ME_B64_DECUI8((ui8_t
)(cx
= *work
.s
));
1020 if(x
>= a_ME_B64_EQU
)
1027 if(x
>= a_ME_B64_EQU
)
1034 if(x
== a_ME_B64_BAD
)
1041 if(x
== a_ME_B64_BAD
){
1043 /* TODO This would be wrong since iconv(3) may be applied first! */
1045 if(n_psonce
& n_PSO_UNICODE
)
1046 n_string_push_buf(&s
, n_unirepl
, sizeof(n_unirepl
) -1);
1048 n_string_push_c(&s
, '?');
1051 }else if(c
== a_ME_B64_EQU
&& x
!= a_ME_B64_EQU
){
1052 /* This is not only invalid but bogus. Skip it over! */
1053 /* TODO This would be wrong since iconv(3) may be applied first! */
1055 n_string_push_buf(&s
, n_UNIREPL n_UNIREPL n_UNIREPL n_UNIREPL
,
1056 (sizeof(n_UNIREPL
) -1) * 4);
1062 pb
= ((a
<< 2) | ((b
& 0x30) >> 4));
1063 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
1064 n_string_push_c(&s
, (char)pb
);
1065 pb
= (((b
& 0x0F) << 4) | ((c
& 0x3C) >> 2));
1066 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
1067 n_string_push_c(&s
, (char)pb
);
1068 if(x
!= a_ME_B64_EQU
){
1069 pb
= (((c
& 0x03) << 6) | x
);
1070 if(pb
!= (ui8_t
)'\r' || !(n_pstate
& n_PS_BASE64_STRIP_CR
))
1071 n_string_push_c(&s
, (char)pb
);
1080 if(b64l
> 0 && b64l
!= 4){
1081 if(inrest_or_null
== NULL
)
1083 inrest_or_null
->s
= n_realloc(inrest_or_null
->s
, b64l
+1);
1084 inrest_or_null
->s
[0] = ca
;
1086 inrest_or_null
->s
[1] = cb
;
1088 inrest_or_null
->s
[2] = cc
;
1089 inrest_or_null
->s
[inrest_or_null
->l
= b64l
] = '\0';
1098 out
->s
= n_string_cp(&s
);
1100 n_string_drop_ownership(&s
);
1102 n_string_gut(&workbuf
);
1105 return (out
!= NULL
);