1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045:
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2013 Steffen "Daode" Nurpmeso <sdaoden@users.sf.net>.
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
44 N
= 0, /* Do not quote */
45 Q
= 1, /* Must quote */
47 XF
= 3, /* Special character 'F' - maybe quoted */
48 XD
= 4, /* Special character '.' - maybe quoted */
49 US
= '_', /* In header, special character ' ' quoted as '_' */
50 QM
= '?', /* In header, special character ? not always quoted */
51 EQ
= Q
, /* '=' must be quoted */
52 TB
= SP
, /* Treat '\t' as a space */
53 NL
= N
, /* Don't quote '\n' (NL) */
54 CR
= Q
/* Always quote a '\r' (CR) */
57 /* Lookup tables to decide wether a character must be encoded or not.
58 * Email header differences according to RFC 2047, section 4.2:
59 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
60 * - don't care about the special ^F[rom] and ^.$ */
61 static uc_it
const _qtab_body
[] = {
62 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,TB
,NL
, Q
, Q
,CR
, Q
, Q
,
63 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
64 SP
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,XD
, N
,
65 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
, N
,
67 N
, N
, N
, N
, N
, N
,XF
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
68 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
69 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
70 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
73 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
74 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
75 US
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
76 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
,QM
,
78 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
79 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
80 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
81 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
84 /* Check wether **s* must be quoted according to *ishead*, else body rules;
85 * *sol* indicates wether we are at the first character of a line/field */
86 SINLINE
enum _qact
_mustquote(char const *s
, char const *e
, bool_t sol
,
89 /* Convert c to/from a hexadecimal character string */
90 SINLINE
char * _qp_ctohex(char *store
, char c
);
91 SINLINE si_it
_qp_cfromhex(char const *hex
);
93 /* Trim WS and make *work* point to the decodable range of *in*.
94 * Return the amount of bytes a b64_decode operation on that buffer requires */
95 static size_t _b64_decode_prepare(struct str
*work
,
96 struct str
const *in
);
98 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base *in*put.
99 * Return number of useful bytes in *out* or -1 on error */
100 static ssize_t
_b64_decode(struct str
*out
, struct str
*in
);
103 _mustquote(char const *s
, char const *e
, bool_t sol
, bool_t ishead
)
105 uc_it
const *qtab
= ishead
? _qtab_head
: _qtab_body
;
106 enum _qact a
= ((uc_it
)*s
> 0x7F) ? Q
: qtab
[(uc_it
)*s
], r
;
108 if ((r
= a
) == N
|| (r
= a
) == Q
)
112 /* Special header fields */
119 /* Treat '?' only special if part of '=?' and '?=' (still to
120 * much quoting since it's '=?CHARSET?CTE?stuff?=', and
121 * especially the trailing ?= should be hard too match ,) */
122 if (a
== QM
&& ((! sol
&& s
[-1] == '=') ||
123 (s
< e
&& s
[1] == '=')))
131 /* WS only if trailing white space */
132 if (s
+ 1 == e
|| s
[1] == '\n')
137 /* Rest are special begin-of-line cases */
143 if (s
+ 4 < e
&& s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm')
148 if (a
== XD
&& (s
+ 1 == e
|| s
[1] == '\n'))
157 _qp_ctohex(char *store
, char c
)
159 static char const hexmap
[] = "0123456789ABCDEF";
162 store
[1] = hexmap
[(uc_it
)c
& 0x0F];
163 c
= ((uc_it
)c
>> 4) & 0x0F;
164 store
[0] = hexmap
[(uc_it
)c
];
169 _qp_cfromhex(char const *hex
)
171 /* Be robust, allow lowercase hexadecimal letters, too */
172 static uc_it
const atoi16
[] = {
173 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
174 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
175 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
176 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
178 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
179 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
184 if ((i1
= (uc_it
)hex
[0] - '0') >= NELEM(atoi16
) ||
185 (i2
= (uc_it
)hex
[1] - '0') >= NELEM(atoi16
))
189 if ((i1
| i2
) & 0xF0)
202 _b64_decode_prepare(struct str
*work
, struct str
const *in
)
205 size_t cp_len
= in
->l
;
207 while (cp_len
> 0 && spacechar(*cp
))
211 for (cp
+= cp_len
; cp_len
> 0; --cp_len
) {
219 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
224 _b64_decode(struct str
*out
, struct str
*in
)
226 static signed char const b64index
[] = {
227 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
228 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
229 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
230 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
231 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
232 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
233 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
234 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
236 #define EQU (ui_it)-2
237 #define BAD (ui_it)-1
238 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (ui_it)b64index[(c)])
241 uc_it
*p
= (uc_it
*)out
->s
;
242 uc_it
const *q
= (uc_it
const*)in
->s
, *end
;
246 for (end
= q
+ in
->l
; q
+ 4 <= end
; q
+= 4) {
247 ui_it a
= uchar64(q
[0]), b
= uchar64(q
[1]), c
= uchar64(q
[2]),
250 if (a
>= EQU
|| b
>= EQU
|| c
== BAD
|| d
== BAD
)
253 *p
++ = ((a
<< 2) | ((b
& 0x30) >> 4));
254 if (c
== EQU
) { /* got '=' */
259 *p
++ = (((b
& 0x0f) << 4) | ((c
& 0x3c) >> 2));
260 if (d
== EQU
) /* got '=' */
262 *p
++ = (((c
& 0x03) << 6) | d
);
268 ret
= (size_t)((char*)p
- out
->s
);
269 out
->l
= (size_t)ret
;
271 in
->l
-= (size_t)((char*)UNCONST(q
) - in
->s
);
277 mime_cte_mustquote(char const *ln
, size_t lnlen
, bool_t ishead
)
282 for (ret
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
283 ret
+= (_mustquote(ln
, ln
+ lnlen
, sol
, ishead
) != N
);
288 qp_encode_calc_size(size_t len
)
290 /* Worst case: 'CRLF' -> '=0D=0A=' */
291 len
= len
* 3 + (len
>> 1) + 1;
296 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
)
301 return qp_encode(out
, &in
, flags
);
305 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
311 return qp_encode(out
, &in
, flags
);
315 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
)
317 bool_t sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
), seenx
;
322 if ((flags
& QP_BUF
) == 0) {
323 lnlen
= qp_encode_calc_size(in
->l
);
324 out
->s
= (flags
& QP_SALLOC
) ? salloc(lnlen
)
325 : srealloc(out
->s
, lnlen
);
333 for (seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
) {
334 enum _qact mq
= _mustquote(is
, ie
, sol
, TRU1
);
338 /* We convert into a single *encoded-word*,
339 * that'll end up in =?C?Q??=; quote '?' from
340 * the moment when we're inside there on */
341 if (seenx
&& c
== '?')
350 qp
= _qp_ctohex(qp
, c
) + 1;
356 /* The body needs to take care for soft line breaks etc. */
357 for (lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
) {
358 enum _qact mq
= _mustquote(is
, ie
, sol
, FAL0
);
361 if (mq
== N
&& (c
!= '\n' || ! seenx
)) {
363 if (++lnlen
< QP_LINESIZE
- 1 -1)
365 /* Don't write a soft line break when we're in the last
366 * possible column and either an LF has been written or
367 * only an LF follows, as that'll end the line anyway */
368 /* XXX but - ensure is+1>=ie, then??
369 * xxx and/or - what about resetting lnlen; that contra
370 * xxx dicts input==1 input line assertion, though */
371 if (c
== '\n' || is
== ie
|| *is
== '\n')
381 if (lnlen
> QP_LINESIZE
- 3 - 1 -1) {
388 qp
= _qp_ctohex(qp
, c
);
391 if (c
!= '\n' || ! seenx
)
399 /* Enforce soft line break if we haven't seen LF */
400 if (in
->l
> 0 && *--is
!= '\n') {
406 out
->l
= (size_t)(qp
- out
->s
);
407 out
->s
[out
->l
] = '\0';
412 qp_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
418 if (rest
!= NULL
&& rest
->l
!= 0) {
426 out
->s
= srealloc(out
->s
, out
->l
+ in
->l
+ 3);
431 /* Decoding encoded-word (RFC 2049) in a header field? */
440 c
= _qp_cfromhex(is
);
445 /* Illegal according to RFC 2045,
446 * section 6.7. Almost follow it */
448 oc
[0] = '['; oc
[1] = '?'; oc
[2] = ']';
452 *oc
++ = (c
== '_') ? ' ' : (char)c
;
454 goto jleave
; /* XXX QP decode, header: errors not reported */
457 /* Decoding a complete message/mimepart body line */
467 * Therefore, when decoding a Quoted-Printable body, any
468 * trailing white space on a line must be deleted, as it will
469 * necessarily have been added by intermediate transport
472 for (; is
< ie
&& blankchar(*is
); ++is
)
475 /* Soft line break? */
482 /* Not a soft line break? */
484 c
= _qp_cfromhex(is
);
489 /* Illegal according to RFC 2045, section 6.7.
490 * Rather follow it and include the = and the
493 oc
[0] = '['; oc
[1] = '?'; oc
[2] = ']';
499 /* CRLF line endings are encoded as QP, followed by a soft line
500 * break, so check for this special case, and simply forget we
501 * have seen one, so as not to end up with the entire DOS file
502 * in a contiguous buffer */
504 if (oc
> os
&& oc
[-1] == '\n') {
505 #if 0 /* TODO qp_decode() we do not normalize CRLF
506 * TODO to LF because for that we would need
507 * TODO to know if we are about to write to
508 * TODO the display or do save the file!
509 * TODO 'hope the MIME/send layer rewrite will
510 * TODO offer the possibility to DTRT */
511 if (oc
- 1 > os
&& oc
[-2] == '\r') {
518 out
->l
= (size_t)(oc
- os
);
519 rest
->s
= srealloc(rest
->s
, rest
->l
+ out
->l
);
520 memcpy(rest
->s
+ rest
->l
, out
->s
, out
->l
);
525 /* XXX RFC: QP decode should check no trailing WS on line */
527 out
->l
= (size_t)(oc
- os
);
533 b64_encode_calc_size(size_t len
)
536 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
542 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
)
544 static char const b64table
[] =
545 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
546 uc_it
const *p
= (uc_it
const*)in
->s
;
547 ssize_t i
= b64_encode_calc_size(in
->l
), lnlen
;
550 if ((flags
& B64_BUF
) == 0)
551 out
->s
= (flags
& B64_SALLOC
) ? salloc(i
) : srealloc(out
->s
, i
);
554 if (! (flags
& (B64_CRLF
|B64_LF
)))
555 flags
&= ~B64_MULTILINE
;
557 for (lnlen
= 0, i
= (ssize_t
)in
->l
; i
> 0; p
+= 3, i
-= 3) {
558 ui_it a
= p
[0], b
, c
;
560 b64
[0] = b64table
[a
>> 2];
563 b64
[1] = b64table
[((a
& 0x3) << 4)];
569 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
570 b64
[2] = b64table
[((b
& 0xf) << 2)];
576 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
577 b64
[2] = b64table
[((b
& 0xf) << 2) | ((c
& 0xc0) >> 6)];
578 b64
[3] = b64table
[c
& 0x3f];
583 if (! (flags
& B64_MULTILINE
))
586 if (lnlen
< B64_LINESIZE
- 1)
590 if (flags
& B64_CRLF
)
592 if (flags
& (B64_CRLF
|B64_LF
))
596 if ((flags
& (B64_CRLF
|B64_LF
)) != 0 &&
597 ((flags
& B64_MULTILINE
) == 0 || lnlen
!= 0)) {
598 if (flags
& B64_CRLF
)
600 if (flags
& (B64_CRLF
|B64_LF
))
603 out
->l
= (size_t)(b64
- out
->s
);
604 out
->s
[out
->l
] = '\0';
609 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
)
614 return b64_encode(out
, &in
, flags
);
618 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
624 return b64_encode(out
, &in
, flags
);
628 b64_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
633 size_t len
= _b64_decode_prepare(&work
, in
);
635 /* Ignore an empty input, as may happen for an empty final line */
637 /* In B64_T cases there may be leftover decoded data for
638 * iconv(3) though, even if that means it's incomplete
639 * multibyte character we have to copy over */
640 /* XXX strictly speaking this should not be handled in here,
641 * XXX since its leftover decoded data from an iconv(3);
642 * XXX like this we shared the prototype with QP, though?? */
643 if (rest
!= NULL
&& rest
->l
> 0) {
653 if (work
.l
>= 4 && (work
.l
& 3) == 0) {
654 out
->s
= srealloc(out
->s
, len
);
657 if (ret
!= OKAY
|| (ssize_t
)(len
= _b64_decode(out
, &work
)) < 0)
663 char const *err
= tr(15, "[Invalid Base64 encoding ignored]\n");
665 x
= out
->s
= srealloc(out
->s
, len
+ 2);
666 if (rest
!= NULL
&& rest
->l
)
671 out
->l
= (size_t)(x
- out
->s
);