1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045:
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2013 Steffen "Daode" Nurpmeso <sdaoden@users.sf.net>.
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
40 #ifndef HAVE_AMALGAMATION
45 N
= 0, /* Do not quote */
46 Q
= 1, /* Must quote */
48 XF
= 3, /* Special character 'F' - maybe quoted */
49 XD
= 4, /* Special character '.' - maybe quoted */
50 US
= '_', /* In header, special character ' ' quoted as '_' */
51 QM
= '?', /* In header, special character ? not always quoted */
52 EQ
= Q
, /* '=' must be quoted */
53 TB
= SP
, /* Treat '\t' as a space */
54 NL
= N
, /* Don't quote '\n' (NL) */
55 CR
= Q
/* Always quote a '\r' (CR) */
58 /* Lookup tables to decide wether a character must be encoded or not.
59 * Email header differences according to RFC 2047, section 4.2:
60 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
61 * - don't care about the special ^F[rom] and ^.$ */
62 static ui8_t
const _qtab_body
[] = {
63 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,TB
,NL
, Q
, Q
,CR
, Q
, Q
,
64 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
65 SP
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,XD
, N
,
66 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
, N
,
68 N
, N
, N
, N
, N
, N
,XF
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
69 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
70 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
71 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
74 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
75 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
76 US
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
77 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
,QM
,
79 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
80 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
81 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
82 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
85 /* Check wether **s* must be quoted according to *ishead*, else body rules;
86 * *sol* indicates wether we are at the first character of a line/field */
87 SINLINE
enum _qact
_mustquote(char const *s
, char const *e
, bool_t sol
,
90 /* Convert c to/from a hexadecimal character string */
91 SINLINE
char * _qp_ctohex(char *store
, char c
);
92 SINLINE si32_t
_qp_cfromhex(char const *hex
);
94 /* Trim WS and make *work* point to the decodable range of *in*.
95 * Return the amount of bytes a b64_decode operation on that buffer requires */
96 static size_t _b64_decode_prepare(struct str
*work
,
97 struct str
const *in
);
99 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base *in*put.
100 * Return number of useful bytes in *out* or -1 on error */
101 static ssize_t
_b64_decode(struct str
*out
, struct str
*in
);
104 _mustquote(char const *s
, char const *e
, bool_t sol
, bool_t ishead
)
106 ui8_t
const *qtab
= ishead
? _qtab_head
: _qtab_body
;
107 enum _qact a
= ((ui8_t
)*s
> 0x7F) ? Q
: qtab
[(ui8_t
)*s
], r
;
109 if ((r
= a
) == N
|| (r
= a
) == Q
)
113 /* Special header fields */
120 /* Treat '?' only special if part of '=?' and '?=' (still to much quoting
121 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
122 * should be hard too match */
123 if (a
== QM
&& ((! sol
&& s
[-1] == '=') || (s
< e
&& s
[1] == '=')))
131 /* WS only if trailing white space */
132 if (s
+ 1 == e
|| s
[1] == '\n')
137 /* Rest are special begin-of-line cases */
143 if (s
+ 4 < e
&& s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm')
148 if (a
== XD
&& (s
+ 1 == e
|| s
[1] == '\n'))
157 _qp_ctohex(char *store
, char c
)
159 static char const hexmap
[] = "0123456789ABCDEF";
162 store
[1] = hexmap
[(ui8_t
)c
& 0x0F];
163 c
= ((ui8_t
)c
>> 4) & 0x0F;
164 store
[0] = hexmap
[(ui8_t
)c
];
169 _qp_cfromhex(char const *hex
)
171 /* Be robust, allow lowercase hexadecimal letters, too */
172 static ui8_t
const atoi16
[] = {
173 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
174 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
175 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
176 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
178 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
179 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
184 if ((i1
= (ui8_t
)hex
[0] - '0') >= NELEM(atoi16
) ||
185 (i2
= (ui8_t
)hex
[1] - '0') >= NELEM(atoi16
))
189 if ((i1
| i2
) & 0xF0)
202 _b64_decode_prepare(struct str
*work
, struct str
const *in
)
205 size_t cp_len
= in
->l
;
207 while (cp_len
> 0 && spacechar(*cp
))
211 for (cp
+= cp_len
; cp_len
> 0; --cp_len
) {
219 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
224 _b64_decode(struct str
*out
, struct str
*in
)
226 static signed char const b64index
[] = {
227 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
228 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
229 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
230 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
231 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
232 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
233 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
234 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
236 #define EQU (ui32_t)-2
237 #define BAD (ui32_t)-1
238 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (ui32_t)b64index[(c)])
241 ui8_t
*p
= (ui8_t
*)out
->s
;
242 ui8_t
const *q
= (ui8_t
const*)in
->s
, *end
;
246 for (end
= q
+ in
->l
; q
+ 4 <= end
; q
+= 4) {
247 ui32_t a
= uchar64(q
[0]), b
= uchar64(q
[1]), c
= uchar64(q
[2]),
250 if (a
>= EQU
|| b
>= EQU
|| c
== BAD
|| d
== BAD
)
253 *p
++ = ((a
<< 2) | ((b
& 0x30) >> 4));
254 if (c
== EQU
) { /* got '=' */
259 *p
++ = (((b
& 0x0f) << 4) | ((c
& 0x3c) >> 2));
260 if (d
== EQU
) /* got '=' */
262 *p
++ = (((c
& 0x03) << 6) | d
);
268 ret
= PTR2SIZE((char*)p
- out
->s
);
269 out
->l
= (size_t)ret
;
271 in
->l
-= PTR2SIZE((char*)UNCONST(q
) - in
->s
);
277 mime_cte_mustquote(char const *ln
, size_t lnlen
, bool_t ishead
)
282 for (ret
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
283 ret
+= (_mustquote(ln
, ln
+ lnlen
, sol
, ishead
) != N
);
288 qp_encode_calc_size(size_t len
)
290 /* Worst case: 'CRLF' -> '=0D=0A=\n\0' */
291 len
= (len
* 3) + 1/* soft NL */ + 1/* visual NL */ + 1/* NUL */;
297 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
)
302 return qp_encode(out
, &in
, flags
);
306 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
312 return qp_encode(out
, &in
, flags
);
317 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
)
319 bool_t sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
), seenx
;
324 if ((flags
& QP_BUF
) == 0) {
325 lnlen
= qp_encode_calc_size(in
->l
);
326 out
->s
= (flags
& QP_SALLOC
) ? salloc(lnlen
) : srealloc(out
->s
, lnlen
);
334 for (seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
) {
335 enum _qact mq
= _mustquote(is
, ie
, sol
, TRU1
);
339 /* We convert into a single *encoded-word*, that'll end up in
340 * =?C?Q??=; quote '?' from when we're inside there on */
341 if (seenx
&& c
== '?')
350 qp
= _qp_ctohex(qp
, c
) + 1;
356 /* The body needs to take care for soft line breaks etc. */
357 for (lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
) {
358 enum _qact mq
= _mustquote(is
, ie
, sol
, FAL0
);
361 if (mq
== N
&& (c
!= '\n' || !seenx
)) {
363 if (++lnlen
< QP_LINESIZE
- 1)
365 /* Don't write a soft line break when we're in the last possible
366 * column and either an LF has been written or only an LF follows, as
367 * that'll end the line anyway */
368 /* XXX but - ensure is+1>=ie, then??
369 * xxx and/or - what about resetting lnlen; that contra
370 * xxx dicts input==1 input line assertion, though */
371 if (c
== '\n' || is
== ie
|| *is
== '\n')
381 if (lnlen
> QP_LINESIZE
- 3 - 1) {
388 qp
= _qp_ctohex(qp
, c
);
391 if (c
!= '\n' || !seenx
)
399 /* Enforce soft line break if we haven't seen LF */
400 if (in
->l
> 0 && *--is
!= '\n') {
406 out
->l
= PTR2SIZE(qp
- out
->s
);
407 out
->s
[out
->l
] = '\0';
412 qp_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
418 if (rest
!= NULL
&& rest
->l
!= 0) {
426 out
->s
= srealloc(out
->s
, out
->l
+ in
->l
+ 3);
431 /* Decoding encoded-word (RFC 2049) in a header field? */
433 while (PTRCMP(is
, <, ie
)) {
436 if (PTRCMP(is
+ 1, >=, ie
)) {
440 c
= _qp_cfromhex(is
);
445 /* Illegal according to RFC 2045, section 6.7. Almost follow */
448 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
449 *oc += 3; 0xFFFD TODO
453 *oc
++ = (c
== '_') ? ' ' : (char)c
;
455 goto jleave
; /* XXX QP decode, header: errors not reported */
458 /* Decoding a complete message/mimepart body line */
459 while (PTRCMP(is
, <, ie
)) {
467 * Therefore, when decoding a Quoted-Printable body, any
468 * trailing white space on a line must be deleted, as it will
469 * necessarily have been added by intermediate transport
471 for (; PTRCMP(is
, <, ie
) && blankchar(*is
); ++is
)
473 if (PTRCMP(is
+ 1, >=, ie
)) {
474 /* Soft line break? */
481 /* Not a soft line break? */
483 c
= _qp_cfromhex(is
);
488 /* Illegal according to RFC 2045, section 6.7.
489 * Almost follow it and include the = and the follow char */
492 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
493 *oc += 3; 0xFFFD TODO
499 /* CRLF line endings are encoded as QP, followed by a soft line break, so
500 * check for this special case, and simply forget we have seen one, so as
501 * not to end up with the entire DOS file in a contiguous buffer */
503 if (PTRCMP(oc
, >, os
) && oc
[-1] == '\n') {
504 #if 0 /* TODO qp_decode() we do not normalize CRLF
505 * TODO to LF because for that we would need
506 * TODO to know if we are about to write to
507 * TODO the display or do save the file!
508 * TODO 'hope the MIME/send layer rewrite will
509 * TODO offer the possibility to DTRT */
510 if (oc
- 1 > os
&& oc
[-2] == '\r') {
517 out
->l
= PTR2SIZE(oc
- os
);
518 rest
->s
= srealloc(rest
->s
, rest
->l
+ out
->l
);
519 memcpy(rest
->s
+ rest
->l
, out
->s
, out
->l
);
524 /* XXX RFC: QP decode should check no trailing WS on line */
526 out
->l
= PTR2SIZE(oc
- os
);
532 b64_encode_calc_size(size_t len
)
535 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
536 len
+= 2 + 1; /* CRLF, \0 */
541 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
)
543 static char const b64table
[] =
544 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
545 ui8_t
const *p
= (ui8_t
const*)in
->s
;
549 if (!(flags
& B64_BUF
)) {
550 i
= b64_encode_calc_size(in
->l
);
551 out
->s
= (flags
& B64_SALLOC
) ? salloc(i
) : srealloc(out
->s
, i
);
555 if (!(flags
& (B64_CRLF
| B64_LF
)))
556 flags
&= ~B64_MULTILINE
;
558 for (lnlen
= 0, i
= (ssize_t
)in
->l
; i
> 0; p
+= 3, i
-= 3) {
559 ui32_t a
= p
[0], b
, c
;
561 b64
[0] = b64table
[a
>> 2];
564 b64
[1] = b64table
[((a
& 0x3) << 4)];
570 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
571 b64
[2] = b64table
[((b
& 0xf) << 2)];
577 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
578 b64
[2] = b64table
[((b
& 0xf) << 2) | ((c
& 0xc0) >> 6)];
579 b64
[3] = b64table
[c
& 0x3f];
584 if (!(flags
& B64_MULTILINE
))
587 if (lnlen
< B64_LINESIZE
)
591 if (flags
& B64_CRLF
)
593 if (flags
& (B64_CRLF
| B64_LF
))
597 if ((flags
& (B64_CRLF
| B64_LF
)) &&
598 (!(flags
& B64_MULTILINE
) || lnlen
!= 0)) {
599 if (flags
& B64_CRLF
)
601 if (flags
& (B64_CRLF
| B64_LF
))
604 out
->l
= PTR2SIZE(b64
- out
->s
);
605 out
->s
[out
->l
] = '\0';
610 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
)
615 return b64_encode(out
, &in
, flags
);
619 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
625 return b64_encode(out
, &in
, flags
);
629 b64_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
634 size_t len
= _b64_decode_prepare(&work
, in
);
636 /* Ignore an empty input, as may happen for an empty final line */
638 /* In B64_T cases there may be leftover decoded data for
639 * iconv(3) though, even if that means it's incomplete
640 * multibyte character we have to copy over */
641 /* XXX strictly speaking this should not be handled in here,
642 * XXX since its leftover decoded data from an iconv(3);
643 * XXX like this we shared the prototype with QP, though?? */
644 if (rest
!= NULL
&& rest
->l
> 0) {
654 if (work
.l
>= 4 && (work
.l
& 3) == 0) {
655 out
->s
= srealloc(out
->s
, len
);
658 if (ret
!= OKAY
|| (ssize_t
)(len
= _b64_decode(out
, &work
)) < 0)
664 char const *err
= tr(15, "[Invalid Base64 encoding ignored]\n");
666 x
= out
->s
= srealloc(out
->s
, len
+ 2);
667 if (rest
!= NULL
&& rest
->l
)
672 out
->l
= PTR2SIZE(x
- out
->s
);
680 /* vim:set fenc=utf-8:s-it-mode */