1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045:
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2013 Steffen "Daode" Nurpmeso <sdaoden@users.sf.net>.
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
40 #ifndef HAVE_AMALGAMATION
45 N
= 0, /* Do not quote */
46 Q
= 1, /* Must quote */
48 XF
= 3, /* Special character 'F' - maybe quoted */
49 XD
= 4, /* Special character '.' - maybe quoted */
50 US
= '_', /* In header, special character ' ' quoted as '_' */
51 QM
= '?', /* In header, special character ? not always quoted */
52 EQ
= Q
, /* '=' must be quoted */
53 TB
= SP
, /* Treat '\t' as a space */
54 NL
= N
, /* Don't quote '\n' (NL) */
55 CR
= Q
/* Always quote a '\r' (CR) */
58 /* Lookup tables to decide wether a character must be encoded or not.
59 * Email header differences according to RFC 2047, section 4.2:
60 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
61 * - don't care about the special ^F[rom] and ^.$ */
62 static uc_it
const _qtab_body
[] = {
63 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,TB
,NL
, Q
, Q
,CR
, Q
, Q
,
64 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
65 SP
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,XD
, N
,
66 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
, N
,
68 N
, N
, N
, N
, N
, N
,XF
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
69 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
70 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
71 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
74 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
75 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
76 US
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
77 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
,QM
,
79 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
80 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
81 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
82 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
85 /* Check wether **s* must be quoted according to *ishead*, else body rules;
86 * *sol* indicates wether we are at the first character of a line/field */
87 SINLINE
enum _qact
_mustquote(char const *s
, char const *e
, bool_t sol
,
90 /* Convert c to/from a hexadecimal character string */
91 SINLINE
char * _qp_ctohex(char *store
, char c
);
92 SINLINE si_it
_qp_cfromhex(char const *hex
);
94 /* Trim WS and make *work* point to the decodable range of *in*.
95 * Return the amount of bytes a b64_decode operation on that buffer requires */
96 static size_t _b64_decode_prepare(struct str
*work
,
97 struct str
const *in
);
99 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base *in*put.
100 * Return number of useful bytes in *out* or -1 on error */
101 static ssize_t
_b64_decode(struct str
*out
, struct str
*in
);
104 _mustquote(char const *s
, char const *e
, bool_t sol
, bool_t ishead
)
106 uc_it
const *qtab
= ishead
? _qtab_head
: _qtab_body
;
107 enum _qact a
= ((uc_it
)*s
> 0x7F) ? Q
: qtab
[(uc_it
)*s
], r
;
109 if ((r
= a
) == N
|| (r
= a
) == Q
)
113 /* Special header fields */
120 /* Treat '?' only special if part of '=?' and '?=' (still to
121 * much quoting since it's '=?CHARSET?CTE?stuff?=', and
122 * especially the trailing ?= should be hard too match ,) */
123 if (a
== QM
&& ((! sol
&& s
[-1] == '=') ||
124 (s
< e
&& s
[1] == '=')))
132 /* WS only if trailing white space */
133 if (s
+ 1 == e
|| s
[1] == '\n')
138 /* Rest are special begin-of-line cases */
144 if (s
+ 4 < e
&& s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm')
149 if (a
== XD
&& (s
+ 1 == e
|| s
[1] == '\n'))
158 _qp_ctohex(char *store
, char c
)
160 static char const hexmap
[] = "0123456789ABCDEF";
163 store
[1] = hexmap
[(uc_it
)c
& 0x0F];
164 c
= ((uc_it
)c
>> 4) & 0x0F;
165 store
[0] = hexmap
[(uc_it
)c
];
170 _qp_cfromhex(char const *hex
)
172 /* Be robust, allow lowercase hexadecimal letters, too */
173 static uc_it
const atoi16
[] = {
174 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
175 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
176 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
178 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
179 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
180 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
185 if ((i1
= (uc_it
)hex
[0] - '0') >= NELEM(atoi16
) ||
186 (i2
= (uc_it
)hex
[1] - '0') >= NELEM(atoi16
))
190 if ((i1
| i2
) & 0xF0)
203 _b64_decode_prepare(struct str
*work
, struct str
const *in
)
206 size_t cp_len
= in
->l
;
208 while (cp_len
> 0 && spacechar(*cp
))
212 for (cp
+= cp_len
; cp_len
> 0; --cp_len
) {
220 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
225 _b64_decode(struct str
*out
, struct str
*in
)
227 static signed char const b64index
[] = {
228 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
229 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
230 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
231 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
232 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
233 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
234 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
235 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
237 #define EQU (ui_it)-2
238 #define BAD (ui_it)-1
239 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (ui_it)b64index[(c)])
242 uc_it
*p
= (uc_it
*)out
->s
;
243 uc_it
const *q
= (uc_it
const*)in
->s
, *end
;
247 for (end
= q
+ in
->l
; q
+ 4 <= end
; q
+= 4) {
248 ui_it a
= uchar64(q
[0]), b
= uchar64(q
[1]), c
= uchar64(q
[2]),
251 if (a
>= EQU
|| b
>= EQU
|| c
== BAD
|| d
== BAD
)
254 *p
++ = ((a
<< 2) | ((b
& 0x30) >> 4));
255 if (c
== EQU
) { /* got '=' */
260 *p
++ = (((b
& 0x0f) << 4) | ((c
& 0x3c) >> 2));
261 if (d
== EQU
) /* got '=' */
263 *p
++ = (((c
& 0x03) << 6) | d
);
269 ret
= (size_t)((char*)p
- out
->s
);
270 out
->l
= (size_t)ret
;
272 in
->l
-= (size_t)((char*)UNCONST(q
) - in
->s
);
278 mime_cte_mustquote(char const *ln
, size_t lnlen
, bool_t ishead
)
283 for (ret
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
284 ret
+= (_mustquote(ln
, ln
+ lnlen
, sol
, ishead
) != N
);
289 qp_encode_calc_size(size_t len
)
291 /* Worst case: 'CRLF' -> '=0D=0A=\n\0' */
292 len
= (len
* 3) + 1/* soft NL */ + 1/* visual NL */ + 1/* NUL */;
298 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
)
303 return qp_encode(out
, &in
, flags
);
307 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
313 return qp_encode(out
, &in
, flags
);
318 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
)
320 bool_t sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
), seenx
;
325 if ((flags
& QP_BUF
) == 0) {
326 lnlen
= qp_encode_calc_size(in
->l
);
327 out
->s
= (flags
& QP_SALLOC
) ? salloc(lnlen
)
328 : srealloc(out
->s
, lnlen
);
336 for (seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
) {
337 enum _qact mq
= _mustquote(is
, ie
, sol
, TRU1
);
341 /* We convert into a single *encoded-word*,
342 * that'll end up in =?C?Q??=; quote '?' from
343 * the moment when we're inside there on */
344 if (seenx
&& c
== '?')
353 qp
= _qp_ctohex(qp
, c
) + 1;
359 /* The body needs to take care for soft line breaks etc. */
360 for (lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
) {
361 enum _qact mq
= _mustquote(is
, ie
, sol
, FAL0
);
364 if (mq
== N
&& (c
!= '\n' || ! seenx
)) {
366 if (++lnlen
< QP_LINESIZE
- 1 -1)
368 /* Don't write a soft line break when we're in the last
369 * possible column and either an LF has been written or
370 * only an LF follows, as that'll end the line anyway */
371 /* XXX but - ensure is+1>=ie, then??
372 * xxx and/or - what about resetting lnlen; that contra
373 * xxx dicts input==1 input line assertion, though */
374 if (c
== '\n' || is
== ie
|| *is
== '\n')
384 if (lnlen
> QP_LINESIZE
- 3 - 1 -1) {
391 qp
= _qp_ctohex(qp
, c
);
394 if (c
!= '\n' || ! seenx
)
402 /* Enforce soft line break if we haven't seen LF */
403 if (in
->l
> 0 && *--is
!= '\n') {
409 out
->l
= (size_t)(qp
- out
->s
);
410 out
->s
[out
->l
] = '\0';
415 qp_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
421 if (rest
!= NULL
&& rest
->l
!= 0) {
429 out
->s
= srealloc(out
->s
, out
->l
+ in
->l
+ 3);
434 /* Decoding encoded-word (RFC 2049) in a header field? */
443 c
= _qp_cfromhex(is
);
448 /* Illegal according to RFC 2045,
449 * section 6.7. Almost follow it */
452 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
453 *oc += 3; 0xFFFD TODO
457 *oc
++ = (c
== '_') ? ' ' : (char)c
;
459 goto jleave
; /* XXX QP decode, header: errors not reported */
462 /* Decoding a complete message/mimepart body line */
472 * Therefore, when decoding a Quoted-Printable body, any
473 * trailing white space on a line must be deleted, as it will
474 * necessarily have been added by intermediate transport
477 for (; is
< ie
&& blankchar(*is
); ++is
)
480 /* Soft line break? */
487 /* Not a soft line break? */
489 c
= _qp_cfromhex(is
);
494 /* Illegal according to RFC 2045, section 6.7.
495 * Rather follow it and include the = and the
499 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
500 *oc += 3; 0xFFFD TODO
506 /* CRLF line endings are encoded as QP, followed by a soft line
507 * break, so check for this special case, and simply forget we
508 * have seen one, so as not to end up with the entire DOS file
509 * in a contiguous buffer */
511 if (oc
> os
&& oc
[-1] == '\n') {
512 #if 0 /* TODO qp_decode() we do not normalize CRLF
513 * TODO to LF because for that we would need
514 * TODO to know if we are about to write to
515 * TODO the display or do save the file!
516 * TODO 'hope the MIME/send layer rewrite will
517 * TODO offer the possibility to DTRT */
518 if (oc
- 1 > os
&& oc
[-2] == '\r') {
525 out
->l
= (size_t)(oc
- os
);
526 rest
->s
= srealloc(rest
->s
, rest
->l
+ out
->l
);
527 memcpy(rest
->s
+ rest
->l
, out
->s
, out
->l
);
532 /* XXX RFC: QP decode should check no trailing WS on line */
534 out
->l
= (size_t)(oc
- os
);
540 b64_encode_calc_size(size_t len
)
543 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
549 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
)
551 static char const b64table
[] =
552 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
553 uc_it
const *p
= (uc_it
const*)in
->s
;
554 ssize_t i
= b64_encode_calc_size(in
->l
), lnlen
;
557 if ((flags
& B64_BUF
) == 0)
558 out
->s
= (flags
& B64_SALLOC
) ? salloc(i
) : srealloc(out
->s
, i
);
561 if (! (flags
& (B64_CRLF
|B64_LF
)))
562 flags
&= ~B64_MULTILINE
;
564 for (lnlen
= 0, i
= (ssize_t
)in
->l
; i
> 0; p
+= 3, i
-= 3) {
565 ui_it a
= p
[0], b
, c
;
567 b64
[0] = b64table
[a
>> 2];
570 b64
[1] = b64table
[((a
& 0x3) << 4)];
576 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
577 b64
[2] = b64table
[((b
& 0xf) << 2)];
583 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
584 b64
[2] = b64table
[((b
& 0xf) << 2) | ((c
& 0xc0) >> 6)];
585 b64
[3] = b64table
[c
& 0x3f];
590 if (! (flags
& B64_MULTILINE
))
593 if (lnlen
< B64_LINESIZE
- 1)
597 if (flags
& B64_CRLF
)
599 if (flags
& (B64_CRLF
|B64_LF
))
603 if ((flags
& (B64_CRLF
|B64_LF
)) != 0 &&
604 ((flags
& B64_MULTILINE
) == 0 || lnlen
!= 0)) {
605 if (flags
& B64_CRLF
)
607 if (flags
& (B64_CRLF
|B64_LF
))
610 out
->l
= (size_t)(b64
- out
->s
);
611 out
->s
[out
->l
] = '\0';
616 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
)
621 return b64_encode(out
, &in
, flags
);
625 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
631 return b64_encode(out
, &in
, flags
);
635 b64_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
640 size_t len
= _b64_decode_prepare(&work
, in
);
642 /* Ignore an empty input, as may happen for an empty final line */
644 /* In B64_T cases there may be leftover decoded data for
645 * iconv(3) though, even if that means it's incomplete
646 * multibyte character we have to copy over */
647 /* XXX strictly speaking this should not be handled in here,
648 * XXX since its leftover decoded data from an iconv(3);
649 * XXX like this we shared the prototype with QP, though?? */
650 if (rest
!= NULL
&& rest
->l
> 0) {
660 if (work
.l
>= 4 && (work
.l
& 3) == 0) {
661 out
->s
= srealloc(out
->s
, len
);
664 if (ret
!= OKAY
|| (ssize_t
)(len
= _b64_decode(out
, &work
)) < 0)
670 char const *err
= tr(15, "[Invalid Base64 encoding ignored]\n");
672 x
= out
->s
= srealloc(out
->s
, len
+ 2);
673 if (rest
!= NULL
&& rest
->l
)
678 out
->l
= (size_t)(x
- out
->s
);