1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045:
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2014 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
40 #ifndef HAVE_AMALGAMATION
45 N
= 0, /* Do not quote */
46 Q
= 1, /* Must quote */
48 XF
= 3, /* Special character 'F' - maybe quoted */
49 XD
= 4, /* Special character '.' - maybe quoted */
50 US
= '_', /* In header, special character ' ' quoted as '_' */
51 QM
= '?', /* In header, special character ? not always quoted */
52 EQ
= Q
, /* '=' must be quoted */
53 TB
= SP
, /* Treat '\t' as a space */
54 NL
= N
, /* Don't quote '\n' (NL) */
55 CR
= Q
/* Always quote a '\r' (CR) */
58 /* Lookup tables to decide wether a character must be encoded or not.
59 * Email header differences according to RFC 2047, section 4.2:
60 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
61 * - don't care about the special ^F[rom] and ^.$ */
62 static ui8_t
const _qtab_body
[] = {
63 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,TB
,NL
, Q
, Q
,CR
, Q
, Q
,
64 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
65 SP
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,XD
, N
,
66 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
, N
,
68 N
, N
, N
, N
, N
, N
,XF
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
69 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
70 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
71 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
74 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
75 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
76 US
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
77 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
,QM
,
79 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
80 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
81 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
82 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
85 /* Check wether **s* must be quoted according to *ishead*, else body rules;
86 * *sol* indicates wether we are at the first character of a line/field */
87 SINLINE
enum _qact
_mustquote(char const *s
, char const *e
, bool_t sol
,
90 /* Convert c to/from a hexadecimal character string */
91 SINLINE
char * _qp_ctohex(char *store
, char c
);
92 SINLINE si32_t
_qp_cfromhex(char const *hex
);
94 /* Trim WS and make *work* point to the decodable range of *in*.
95 * Return the amount of bytes a b64_decode operation on that buffer requires */
96 static size_t _b64_decode_prepare(struct str
*work
,
97 struct str
const *in
);
99 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base *in*put.
100 * Return number of useful bytes in *out* or -1 on error */
101 static ssize_t
_b64_decode(struct str
*out
, struct str
*in
);
104 _mustquote(char const *s
, char const *e
, bool_t sol
, bool_t ishead
)
110 qtab
= ishead
? _qtab_head
: _qtab_body
;
111 a
= ((ui8_t
)*s
> 0x7F) ? Q
: qtab
[(ui8_t
)*s
];
113 if ((r
= a
) == N
|| (r
= a
) == Q
)
117 /* Special header fields */
124 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
125 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
126 * should be hard too match */
127 if (a
== QM
&& ((!sol
&& s
[-1] == '=') || (s
< e
&& s
[1] == '=')))
135 /* WS only if trailing white space */
136 if (PTRCMP(s
+ 1, ==, e
) || s
[1] == '\n')
141 /* Rest are special begin-of-line cases */
147 if (PTRCMP(s
+ 4, <, e
) && s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm')
152 if (a
== XD
&& (PTRCMP(s
+ 1, ==, e
) || s
[1] == '\n'))
162 _qp_ctohex(char *store
, char c
)
164 static char const hexmap
[] = "0123456789ABCDEF";
168 store
[1] = hexmap
[(ui8_t
)c
& 0x0F];
169 c
= ((ui8_t
)c
>> 4) & 0x0F;
170 store
[0] = hexmap
[(ui8_t
)c
];
176 _qp_cfromhex(char const *hex
)
178 /* Be robust, allow lowercase hexadecimal letters, too */
179 static ui8_t
const atoi16
[] = {
180 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
181 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
182 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
183 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
184 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
185 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
186 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
193 if ((i1
= (ui8_t
)hex
[0] - '0') >= NELEM(atoi16
) ||
194 (i2
= (ui8_t
)hex
[1] - '0') >= NELEM(atoi16
))
198 if ((i1
| i2
) & 0xF0u
)
212 _b64_decode_prepare(struct str
*work
, struct str
const *in
)
221 while (cp_len
> 0 && spacechar(*cp
))
225 for (cp
+= cp_len
; cp_len
> 0; --cp_len
) {
233 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
239 _b64_decode(struct str
*out
, struct str
*in
)
241 static signed char const b64index
[] = {
242 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
243 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
244 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
245 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
246 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
247 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
248 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
249 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
251 #define EQU (ui32_t)-2
252 #define BAD (ui32_t)-1
253 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (ui32_t)b64index[(c)])
257 ui8_t
const *q
, *end
;
261 q
= (ui8_t
const*)in
->s
;
264 for (end
= q
+ in
->l
; PTRCMP(q
+ 4, <=, end
); q
+= 4) {
265 ui32_t a
= uchar64(q
[0]), b
= uchar64(q
[1]), c
= uchar64(q
[2]),
268 if (a
>= EQU
|| b
>= EQU
|| c
== BAD
|| d
== BAD
)
271 *p
++ = ((a
<< 2) | ((b
& 0x30) >> 4));
272 if (c
== EQU
) { /* got '=' */
277 *p
++ = (((b
& 0x0F) << 4) | ((c
& 0x3C) >> 2));
278 if (d
== EQU
) /* got '=' */
280 *p
++ = (((c
& 0x03) << 6) | d
);
286 ret
= PTR2SIZE((char*)p
- out
->s
);
287 out
->l
= (size_t)ret
;
289 in
->l
-= PTR2SIZE((char*)UNCONST(q
) - in
->s
);
296 mime_cte_mustquote(char const *ln
, size_t lnlen
, bool_t ishead
)
302 for (ret
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
303 ret
+= (_mustquote(ln
, ln
+ lnlen
, sol
, ishead
) != N
);
309 qp_encode_calc_size(size_t len
)
312 /* Worst case: 'CRLF' -> '=0D=0A=\n\0' */
313 len
= (len
* 3) + 1/* soft NL */ + 1/* visual NL */ + 1/* NUL */;
320 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
)
327 out
= qp_encode(out
, &in
, flags
);
333 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
341 out
= qp_encode(out
, &in
, flags
);
348 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
)
350 bool_t sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
), seenx
;
356 if (!(flags
& QP_BUF
)) {
357 lnlen
= qp_encode_calc_size(in
->l
);
358 out
->s
= (flags
& QP_SALLOC
) ? salloc(lnlen
) : srealloc(out
->s
, lnlen
);
366 for (seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
) {
367 enum _qact mq
= _mustquote(is
, ie
, sol
, TRU1
);
371 /* We convert into a single *encoded-word*, that'll end up in
372 * =?C?Q??=; quote '?' from when we're inside there on */
373 if (seenx
&& c
== '?')
382 qp
= _qp_ctohex(qp
, c
) + 1;
388 /* The body needs to take care for soft line breaks etc. */
389 for (lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
) {
390 enum _qact mq
= _mustquote(is
, ie
, sol
, FAL0
);
393 if (mq
== N
&& (c
!= '\n' || !seenx
)) {
395 if (++lnlen
< QP_LINESIZE
- 1)
397 /* Don't write a soft line break when we're in the last possible
398 * column and either an LF has been written or only an LF follows, as
399 * that'll end the line anyway */
400 /* XXX but - ensure is+1>=ie, then??
401 * xxx and/or - what about resetting lnlen; that contra
402 * xxx dicts input==1 input line assertion, though */
403 if (c
== '\n' || is
== ie
|| *is
== '\n')
413 if (lnlen
> QP_LINESIZE
- 3 - 1) {
420 qp
= _qp_ctohex(qp
, c
);
423 if (c
!= '\n' || !seenx
)
431 /* Enforce soft line break if we haven't seen LF */
432 if (in
->l
> 0 && *--is
!= '\n') {
438 out
->l
= PTR2SIZE(qp
- out
->s
);
439 out
->s
[out
->l
] = '\0';
445 qp_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
452 if (rest
!= NULL
&& rest
->l
!= 0) {
460 out
->s
= srealloc(out
->s
, out
->l
+ in
->l
+ 3);
465 /* Decoding encoded-word (RFC 2049) in a header field? */
470 if (PTRCMP(is
+ 1, >=, ie
)) {
474 c
= _qp_cfromhex(is
);
479 /* Illegal according to RFC 2045, section 6.7. Almost follow */
482 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
483 *oc += 3; 0xFFFD TODO
487 *oc
++ = (c
== '_') ? ' ' : (char)c
;
489 goto jleave
; /* XXX QP decode, header: errors not reported */
492 /* Decoding a complete message/mimepart body line */
501 * Therefore, when decoding a Quoted-Printable body, any
502 * trailing white space on a line must be deleted, as it will
503 * necessarily have been added by intermediate transport
505 for (; is
< ie
&& blankchar(*is
); ++is
)
507 if (PTRCMP(is
+ 1, >=, ie
)) {
508 /* Soft line break? */
515 /* Not a soft line break? */
517 c
= _qp_cfromhex(is
);
522 /* Illegal according to RFC 2045, section 6.7.
523 * Almost follow it and include the = and the follow char */
526 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
527 *oc += 3; 0xFFFD TODO
533 /* CRLF line endings are encoded as QP, followed by a soft line break, so
534 * check for this special case, and simply forget we have seen one, so as
535 * not to end up with the entire DOS file in a contiguous buffer */
537 if (oc
> os
&& oc
[-1] == '\n') {
538 #if 0 /* TODO qp_decode() we do not normalize CRLF
539 * TODO to LF because for that we would need
540 * TODO to know if we are about to write to
541 * TODO the display or do save the file!
542 * TODO 'hope the MIME/send layer rewrite will
543 * TODO offer the possibility to DTRT */
544 if (oc
- 1 > os
&& oc
[-2] == '\r') {
551 out
->l
= PTR2SIZE(oc
- os
);
552 rest
->s
= srealloc(rest
->s
, rest
->l
+ out
->l
);
553 memcpy(rest
->s
+ rest
->l
, out
->s
, out
->l
);
558 /* XXX RFC: QP decode should check no trailing WS on line */
560 out
->l
= PTR2SIZE(oc
- os
);
567 b64_encode_calc_size(size_t len
)
571 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
572 len
+= 2 + 1; /* CRLF, \0 */
578 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
)
580 static char const b64table
[] =
581 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
588 p
= (ui8_t
const*)in
->s
;
590 if (!(flags
& B64_BUF
)) {
591 i
= b64_encode_calc_size(in
->l
);
592 out
->s
= (flags
& B64_SALLOC
) ? salloc(i
) : srealloc(out
->s
, i
);
596 if (!(flags
& (B64_CRLF
| B64_LF
)))
597 flags
&= ~B64_MULTILINE
;
599 for (lnlen
= 0, i
= (ssize_t
)in
->l
; i
> 0; p
+= 3, i
-= 3) {
600 ui32_t a
= p
[0], b
, c
;
602 b64
[0] = b64table
[a
>> 2];
605 b64
[1] = b64table
[((a
& 0x3) << 4)];
611 b64
[1] = b64table
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
612 b64
[2] = b64table
[((b
& 0x0F) << 2)];
618 b64
[1] = b64table
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
619 b64
[2] = b64table
[((b
& 0x0F) << 2) | ((c
& 0xC0u
) >> 6)];
620 b64
[3] = b64table
[c
& 0x3F];
625 if (!(flags
& B64_MULTILINE
))
628 if (lnlen
< B64_LINESIZE
)
632 if (flags
& B64_CRLF
)
634 if (flags
& (B64_CRLF
| B64_LF
))
638 if ((flags
& (B64_CRLF
| B64_LF
)) &&
639 (!(flags
& B64_MULTILINE
) || lnlen
!= 0)) {
640 if (flags
& B64_CRLF
)
642 if (flags
& (B64_CRLF
| B64_LF
))
645 out
->l
= PTR2SIZE(b64
- out
->s
);
646 out
->s
[out
->l
] = '\0';
652 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
)
659 out
= b64_encode(out
, &in
, flags
);
665 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
673 out
= b64_encode(out
, &in
, flags
);
679 b64_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
687 len
= _b64_decode_prepare(&work
, in
);
689 /* Ignore an empty input, as may happen for an empty final line */
691 /* With B64_T there may be leftover decoded data for iconv(3), even if
692 * that means it's incomplete multibyte character we have to copy over */
693 /* XXX strictly speaking this should not be handled in here,
694 * XXX since its leftover decoded data from an iconv(3);
695 * XXX like this we shared the prototype with QP, though?? */
696 if (rest
!= NULL
&& rest
->l
> 0) {
706 if (work
.l
>= 4 && !(work
.l
& 3)) {
707 out
->s
= srealloc(out
->s
, len
);
710 if (ret
!= OKAY
|| (ssize_t
)(len
= _b64_decode(out
, &work
)) < 0)
717 char const *err
= tr(15, "[Invalid Base64 encoding ignored]\n");
719 x
= out
->s
= srealloc(out
->s
, len
+ 1 +1);
720 if (rest
!= NULL
&& rest
->l
)
725 out
->l
= PTR2SIZE(x
- out
->s
);
733 /* vim:set fenc=utf-8:s-it-mode */