1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047):
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2014 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
40 #ifndef HAVE_AMALGAMATION
45 N
= 0, /* Do not quote */
46 Q
= 1, /* Must quote */
48 XF
= 3, /* Special character 'F' - maybe quoted */
49 XD
= 4, /* Special character '.' - maybe quoted */
50 UU
= 5, /* In header, _ must be quoted in encoded word */
51 US
= '_', /* In header, ' ' must be quoted as _ in encoded word */
52 QM
= '?', /* In header, special character ? not always quoted */
53 EQ
= '=', /* In header, '=' must be quoted in encoded word */
54 HT
='\t', /* In body HT=SP, in head HT=HT, but quote in encoded word */
55 NL
= N
, /* Don't quote '\n' (NL) */
56 CR
= Q
/* Always quote a '\r' (CR) */
59 /* Lookup tables to decide wether a character must be encoded or not.
60 * Email header differences according to RFC 2047, section 4.2:
61 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
62 * - don't care about the special ^F[rom] and ^.$ */
63 static ui8_t
const _qtab_body
[] = {
64 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,SP
,NL
, Q
, Q
,CR
, Q
, Q
,
65 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
66 SP
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,XD
, N
,
67 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
, N
, N
,
69 N
, N
, N
, N
, N
, N
,XF
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
70 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
71 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
72 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
75 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,HT
, Q
, Q
, Q
, Q
, Q
, Q
,
76 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
77 US
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
78 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
,QM
,
80 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
81 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,UU
,
82 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
83 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
86 /* Check wether *s must be quoted according to flags, else body rules;
87 * sol indicates wether we are at the first character of a line/field */
88 SINLINE
enum _qact
_mustquote(char const *s
, char const *e
, bool_t sol
,
89 enum mimecte_flags flags
);
91 /* Convert c to/from a hexadecimal character string */
92 SINLINE
char * _qp_ctohex(char *store
, char c
);
93 SINLINE si32_t
_qp_cfromhex(char const *hex
);
95 /* Trim WS and make work point to the decodable range of in*
96 * Return the amount of bytes a b64_decode operation on that buffer requires */
97 static size_t _b64_decode_prepare(struct str
*work
,
98 struct str
const *in
);
100 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put).
101 * Return number of useful bytes in out or -1 on error */
102 static ssize_t
_b64_decode(struct str
*out
, struct str
*in
);
105 _mustquote(char const *s
, char const *e
, bool_t sol
, enum mimecte_flags flags
)
111 qtab
= (flags
& (MIMECTE_ISHEAD
| MIMECTE_ISENCWORD
))
112 ? _qtab_head
: _qtab_body
;
113 a
= ((ui8_t
)*s
> 0x7F) ? Q
: qtab
[(ui8_t
)*s
];
115 if ((r
= a
) == N
|| (r
= a
) == Q
)
119 /* Special header fields */
120 if (flags
& (MIMECTE_ISHEAD
| MIMECTE_ISENCWORD
)) {
121 /* Special massage for encoded words */
122 if (flags
& MIMECTE_ISENCWORD
) {
136 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
137 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
138 * should be hard too match */
139 if (a
== QM
&& ((!sol
&& s
[-1] == '=') || (s
< e
&& s
[1] == '=')))
147 /* WS only if trailing white space */
148 if (PTRCMP(s
+ 1, ==, e
) || s
[1] == '\n')
153 /* Rest are special begin-of-line cases */
159 if (PTRCMP(s
+ 4, <, e
) && s
[1] == 'r' && s
[2] == 'o' && s
[3] == 'm')
164 if (a
== XD
&& (PTRCMP(s
+ 1, ==, e
) || s
[1] == '\n'))
174 _qp_ctohex(char *store
, char c
)
176 static char const hexmap
[] = "0123456789ABCDEF";
180 store
[1] = hexmap
[(ui8_t
)c
& 0x0F];
181 c
= ((ui8_t
)c
>> 4) & 0x0F;
182 store
[0] = hexmap
[(ui8_t
)c
];
188 _qp_cfromhex(char const *hex
)
190 /* Be robust, allow lowercase hexadecimal letters, too */
191 static ui8_t
const atoi16
[] = {
192 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
193 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
194 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
195 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
196 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
197 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
198 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
205 if ((i1
= (ui8_t
)hex
[0] - '0') >= NELEM(atoi16
) ||
206 (i2
= (ui8_t
)hex
[1] - '0') >= NELEM(atoi16
))
210 if ((i1
| i2
) & 0xF0u
)
224 _b64_decode_prepare(struct str
*work
, struct str
const *in
)
233 while (cp_len
> 0 && spacechar(*cp
))
237 for (cp
+= cp_len
; cp_len
> 0; --cp_len
) {
245 cp_len
= ((cp_len
* 3) >> 2) + (cp_len
>> 3);
251 _b64_decode(struct str
*out
, struct str
*in
)
253 static signed char const b64index
[] = {
254 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
255 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
256 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
257 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
258 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
259 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
260 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
261 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
263 #define EQU (ui32_t)-2
264 #define BAD (ui32_t)-1
265 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (ui32_t)b64index[(c)])
269 ui8_t
const *q
, *end
;
273 q
= (ui8_t
const*)in
->s
;
276 for (end
= q
+ in
->l
; PTRCMP(q
+ 4, <=, end
); q
+= 4) {
277 ui32_t a
= uchar64(q
[0]), b
= uchar64(q
[1]), c
= uchar64(q
[2]),
280 if (a
>= EQU
|| b
>= EQU
|| c
== BAD
|| d
== BAD
)
283 *p
++ = ((a
<< 2) | ((b
& 0x30) >> 4));
284 if (c
== EQU
) { /* got '=' */
289 *p
++ = (((b
& 0x0F) << 4) | ((c
& 0x3C) >> 2));
290 if (d
== EQU
) /* got '=' */
292 *p
++ = (((c
& 0x03) << 6) | d
);
298 ret
= PTR2SIZE((char*)p
- out
->s
);
299 out
->l
= (size_t)ret
;
301 in
->l
-= PTR2SIZE((char*)UNCONST(q
) - in
->s
);
308 mime_char_to_hexseq(char store
[3], char c
)
313 rv
= _qp_ctohex(store
, c
);
319 mime_hexseq_to_char(char const *hex
)
324 rv
= _qp_cfromhex(hex
);
330 mime_cte_mustquote(char const *ln
, size_t lnlen
, enum mimecte_flags flags
)
336 for (rv
= 0, sol
= TRU1
; lnlen
> 0; sol
= FAL0
, ++ln
, --lnlen
)
337 switch (_mustquote(ln
, ln
+ lnlen
, sol
, flags
)) {
341 assert(flags
& MIMECTE_ISENCWORD
);
353 qp_encode_calc_size(size_t len
)
358 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
359 * However, we must be aware that (a) the output may span multiple lines
360 * and (b) the input does not end with a newline itself (nonetheless):
361 * LC_ALL=C awk 'BEGIN{
362 * for(i = 0; i < 100000; ++i) printf "\xC3\xBC"
364 * MAILRC=/dev/null LC_ALL=en_US.UTF-8 s-nail -nvvd \
365 * -Ssendcharsets=utf8 -s testsub ./LETTER */
367 lines
= bytes
/ QP_LINESIZE
;
371 /* Trailing hard NL may be missing, so there may be two lines.
372 * Thus add soft + hard NL per line and a trailing NUL */
373 lines
= (bytes
/ QP_LINESIZE
) + 1;
384 qp_encode_cp(struct str
*out
, char const *cp
, enum qpflags flags
)
391 out
= qp_encode(out
, &in
, flags
);
397 qp_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
405 out
= qp_encode(out
, &in
, flags
);
412 qp_encode(struct str
*out
, struct str
const *in
, enum qpflags flags
)
414 bool_t sol
= (flags
& QP_ISHEAD
? FAL0
: TRU1
), seenx
;
420 if (!(flags
& QP_BUF
)) {
421 lnlen
= qp_encode_calc_size(in
->l
);
422 out
->s
= (flags
& QP_SALLOC
) ? salloc(lnlen
) : srealloc(out
->s
, lnlen
);
430 enum mimecte_flags ctef
= MIMECTE_ISHEAD
|
431 (flags
& QP_ISENCWORD
? MIMECTE_ISENCWORD
: 0);
433 for (seenx
= FAL0
, sol
= TRU1
; is
< ie
; sol
= FAL0
, ++qp
) {
434 enum _qact mq
= _mustquote(is
, ie
, sol
, ctef
);
438 /* We convert into a single *encoded-word*, that'll end up in
439 * =?C?Q??=; quote '?' from when we're inside there on */
440 if (seenx
&& c
== '?')
449 qp
= _qp_ctohex(qp
, c
) + 1;
455 /* The body needs to take care for soft line breaks etc. */
456 for (lnlen
= 0, seenx
= FAL0
; is
< ie
; sol
= FAL0
) {
457 enum _qact mq
= _mustquote(is
, ie
, sol
, MIMECTE_NONE
);
460 if (mq
== N
&& (c
!= '\n' || !seenx
)) {
462 if (++lnlen
< QP_LINESIZE
- 1)
464 /* Don't write a soft line break when we're in the last possible
465 * column and either an LF has been written or only an LF follows, as
466 * that'll end the line anyway */
467 /* XXX but - ensure is+1>=ie, then??
468 * xxx and/or - what about resetting lnlen; that contra
469 * xxx dicts input==1 input line assertion, though */
470 if (c
== '\n' || is
== ie
|| *is
== '\n')
480 if (lnlen
> QP_LINESIZE
- 3 - 1) {
487 qp
= _qp_ctohex(qp
, c
);
490 if (c
!= '\n' || !seenx
)
498 /* Enforce soft line break if we haven't seen LF */
499 if (in
->l
> 0 && *--is
!= '\n') {
505 out
->l
= PTR2SIZE(qp
- out
->s
);
506 out
->s
[out
->l
] = '\0';
512 qp_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
519 if (rest
!= NULL
&& rest
->l
!= 0) {
527 out
->s
= srealloc(out
->s
, out
->l
+ in
->l
+ 3);
532 /* Decoding encoded-word (RFC 2049) in a header field? */
537 if (PTRCMP(is
+ 1, >=, ie
)) {
541 c
= _qp_cfromhex(is
);
546 /* Invalid according to RFC 2045, section 6.7. Almost follow */
549 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
550 *oc += 3; 0xFFFD TODO
554 *oc
++ = (c
== '_' /* US */) ? ' ' : (char)c
;
556 goto jleave
; /* XXX QP decode, header: errors not reported */
559 /* Decoding a complete message/mimepart body line */
568 * Therefore, when decoding a Quoted-Printable body, any
569 * trailing white space on a line must be deleted, as it will
570 * necessarily have been added by intermediate transport
572 for (; is
< ie
&& blankchar(*is
); ++is
)
574 if (PTRCMP(is
+ 1, >=, ie
)) {
575 /* Soft line break? */
582 /* Not a soft line break? */
584 c
= _qp_cfromhex(is
);
589 /* Invalid according to RFC 2045, section 6.7.
590 * Almost follow it and include the = and the follow char */
593 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
594 *oc += 3; 0xFFFD TODO
600 /* CRLF line endings are encoded as QP, followed by a soft line break, so
601 * check for this special case, and simply forget we have seen one, so as
602 * not to end up with the entire DOS file in a contiguous buffer */
604 if (oc
> os
&& oc
[-1] == '\n') {
605 #if 0 /* TODO qp_decode() we do not normalize CRLF
606 * TODO to LF because for that we would need
607 * TODO to know if we are about to write to
608 * TODO the display or do save the file!
609 * TODO 'hope the MIME/send layer rewrite will
610 * TODO offer the possibility to DTRT */
611 if (oc
- 1 > os
&& oc
[-2] == '\r') {
618 out
->l
= PTR2SIZE(oc
- os
);
619 rest
->s
= srealloc(rest
->s
, rest
->l
+ out
->l
);
620 memcpy(rest
->s
+ rest
->l
, out
->s
, out
->l
);
625 /* XXX RFC: QP decode should check no trailing WS on line */
627 out
->l
= PTR2SIZE(oc
- os
);
634 b64_encode_calc_size(size_t len
)
638 len
+= (((len
/ B64_ENCODE_INPUT_PER_LINE
) + 1) * 3);
639 len
+= 2 + 1; /* CRLF, \0 */
645 b64_encode(struct str
*out
, struct str
const *in
, enum b64flags flags
)
647 static char const b64table
[] =
648 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
655 p
= (ui8_t
const*)in
->s
;
657 if (!(flags
& B64_BUF
)) {
658 i
= b64_encode_calc_size(in
->l
);
659 out
->s
= (flags
& B64_SALLOC
) ? salloc(i
) : srealloc(out
->s
, i
);
663 if (!(flags
& (B64_CRLF
| B64_LF
)))
664 flags
&= ~B64_MULTILINE
;
666 for (lnlen
= 0, i
= (ssize_t
)in
->l
; i
> 0; p
+= 3, i
-= 3) {
667 ui32_t a
= p
[0], b
, c
;
669 b64
[0] = b64table
[a
>> 2];
672 b64
[1] = b64table
[((a
& 0x3) << 4)];
678 b64
[1] = b64table
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
679 b64
[2] = b64table
[((b
& 0x0F) << 2)];
685 b64
[1] = b64table
[((a
& 0x03) << 4) | ((b
& 0xF0u
) >> 4)];
686 b64
[2] = b64table
[((b
& 0x0F) << 2) | ((c
& 0xC0u
) >> 6)];
687 b64
[3] = b64table
[c
& 0x3F];
692 if (!(flags
& B64_MULTILINE
))
695 if (lnlen
< B64_LINESIZE
)
699 if (flags
& B64_CRLF
)
701 if (flags
& (B64_CRLF
| B64_LF
))
705 if ((flags
& (B64_CRLF
| B64_LF
)) &&
706 (!(flags
& B64_MULTILINE
) || lnlen
!= 0)) {
707 if (flags
& B64_CRLF
)
709 if (flags
& (B64_CRLF
| B64_LF
))
712 out
->l
= PTR2SIZE(b64
- out
->s
);
713 out
->s
[out
->l
] = '\0';
719 b64_encode_buf(struct str
*out
, void const *vp
, size_t vp_len
,
727 out
= b64_encode(out
, &in
, flags
);
734 b64_encode_cp(struct str
*out
, char const *cp
, enum b64flags flags
)
741 out
= b64_encode(out
, &in
, flags
);
748 b64_decode(struct str
*out
, struct str
const *in
, struct str
*rest
)
756 len
= _b64_decode_prepare(&work
, in
);
758 /* Ignore an empty input, as may happen for an empty final line */
760 /* With B64_T there may be leftover decoded data for iconv(3), even if
761 * that means it's incomplete multibyte character we have to copy over */
762 /* XXX strictly speaking this should not be handled in here,
763 * XXX since its leftover decoded data from an iconv(3);
764 * XXX like this we shared the prototype with QP, though?? */
765 if (rest
!= NULL
&& rest
->l
> 0) {
775 if (work
.l
>= 4 && !(work
.l
& 3)) {
776 out
->s
= srealloc(out
->s
, len
);
779 if (ret
!= OKAY
|| (ssize_t
)(len
= _b64_decode(out
, &work
)) < 0)
786 char const *err
= _("[Invalid Base64 encoding ignored]\n");
788 x
= out
->s
= srealloc(out
->s
, len
+ 1 +1);
789 if (rest
!= NULL
&& rest
->l
)
794 out
->l
= PTR2SIZE(x
- out
->s
);