IMAP: add support for international mailbox names..
[s-mailx.git] / mime_enc.c
blobd6936ccf7716cc1706dad1931a31a4707a955f3b
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047):
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
8 */
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
16 * by Anon Ymous.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
39 #undef n_FILE
40 #define n_FILE mime_enc
42 #ifndef HAVE_AMALGAMATION
43 # include "nail.h"
44 #endif
46 enum _qact {
47 N = 0, /* Do not quote */
48 Q = 1, /* Must quote */
49 SP = 2, /* sp */
50 XF = 3, /* Special character 'F' - maybe quoted */
51 XD = 4, /* Special character '.' - maybe quoted */
52 UU = 5, /* In header, _ must be quoted in encoded word */
53 US = '_', /* In header, ' ' must be quoted as _ in encoded word */
54 QM = '?', /* In header, special character ? not always quoted */
55 EQ = '=', /* In header, '=' must be quoted in encoded word */
56 HT ='\t', /* In body HT=SP, in head HT=HT, but quote in encoded word */
57 NL = N, /* Don't quote '\n' (NL) */
58 CR = Q /* Always quote a '\r' (CR) */
61 /* Lookup tables to decide wether a character must be encoded or not.
62 * Email header differences according to RFC 2047, section 4.2:
63 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
64 * - don't care about the special ^F[rom] and ^.$ */
65 static ui8_t const _qtab_body[] = {
66 Q, Q, Q, Q, Q, Q, Q, Q, Q,SP,NL, Q, Q,CR, Q, Q,
67 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
68 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
69 N, N, N, N, N, N, N, N, N, N, N, N, N, Q, N, N,
71 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
72 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
73 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
74 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
76 _qtab_head[] = {
77 Q, Q, Q, Q, Q, Q, Q, Q, Q,HT, Q, Q, Q, Q, Q, Q,
78 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
79 US, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
80 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N,QM,
82 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
83 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,UU,
84 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
85 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
88 /* For decoding be robust and allow lowercase letters, too */
89 static char const _qp_itoa16[] = "0123456789ABCDEF";
90 static ui8_t const _qp_atoi16[] = {
91 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
92 0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
93 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
94 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
95 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
96 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
97 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF /* 0x60-0x67 */
100 /* The decoding table is only accessed via _B64_DECUI8() */
101 static char const _b64_enctbl[] =
102 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
103 static signed char const _b64__dectbl[] = {
104 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
105 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
106 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
107 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
108 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
109 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
110 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
111 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
113 #define _B64_EQU (ui32_t)-2
114 #define _B64_BAD (ui32_t)-1
115 #define _B64_DECUI8(C) \
116 ((C) >= sizeof(_b64__dectbl) ? _B64_BAD : (ui32_t)_b64__dectbl[(ui8_t)(C)])
118 /* ASCII case-insensitive check wether Content-Transfer-Encoding: header body
119 * hbody defined this encoding type */
120 static bool_t _is_ct_enc(char const *hbody, char const *encoding);
122 /* Check wether *s must be quoted according to flags, else body rules;
123 * sol indicates wether we are at the first character of a line/field */
124 SINLINE enum _qact _mustquote(char const *s, char const *e, bool_t sol,
125 enum mime_enc_flags flags);
127 /* Convert c to/from a hexadecimal character string */
128 SINLINE char * _qp_ctohex(char *store, char c);
129 SINLINE si32_t _qp_cfromhex(char const *hex);
131 /* Trim WS and make work point to the decodable range of in*
132 * Return the amount of bytes a b64_decode operation on that buffer requires */
133 static size_t _b64_decode_prepare(struct str *work,
134 struct str const *in);
136 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put).
137 * Return number of useful bytes in out or -1 on error */
138 static ssize_t _b64_decode(struct str *out, struct str *in);
140 static bool_t
141 _is_ct_enc(char const *hbody, char const *encoding)
143 bool_t quoted, rv;
144 int c;
145 NYD2_ENTER;
147 if (*hbody == '"')
148 quoted = TRU1, ++hbody;
149 else
150 quoted = FAL0;
151 rv = FAL0;
153 while (*hbody != '\0' && *encoding != '\0')
154 if ((c = *hbody++, lowerconv(c) != *encoding++))
155 goto jleave;
156 rv = TRU1;
158 if (quoted && *hbody == '"')
159 goto jleave;
160 if (*hbody == '\0' || whitechar(*hbody))
161 goto jleave;
162 rv = FAL0;
163 jleave:
164 NYD2_LEAVE;
165 return rv;
168 SINLINE enum _qact
169 _mustquote(char const *s, char const *e, bool_t sol, enum mime_enc_flags flags)
171 ui8_t const *qtab;
172 enum _qact a, r;
173 NYD2_ENTER;
175 qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
176 ? _qtab_head : _qtab_body;
177 a = ((ui8_t)*s > 0x7F) ? Q : qtab[(ui8_t)*s];
179 if ((r = a) == N || (r = a) == Q)
180 goto jleave;
181 r = Q;
183 /* Special header fields */
184 if (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)) {
185 /* Special massage for encoded words */
186 if (flags & MIMEEF_ISENCWORD) {
187 switch (a) {
188 case HT:
189 case US:
190 case EQ:
191 r = a;
192 /* FALLTHRU */
193 case UU:
194 goto jleave;
195 default:
196 break;
200 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
201 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
202 * should be hard too match */
203 if (a == QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
204 goto jleave;
205 goto jnquote;
208 /* Body-only */
210 if (a == SP) {
211 /* WS only if trailing white space */
212 if (PTRCMP(s + 1, ==, e) || s[1] == '\n')
213 goto jleave;
214 goto jnquote;
217 /* Rest are special begin-of-line cases */
218 if (!sol)
219 goto jnquote;
221 /* ^From */
222 if (a == XF) {
223 if (PTRCMP(s + 4, <, e) && s[1] == 'r' && s[2] == 'o' && s[3] == 'm')
224 goto jleave;
225 goto jnquote;
227 /* ^.$ */
228 if (a == XD && (PTRCMP(s + 1, ==, e) || s[1] == '\n'))
229 goto jleave;
230 jnquote:
231 r = N;
232 jleave:
233 NYD2_LEAVE;
234 return r;
237 SINLINE char *
238 _qp_ctohex(char *store, char c)
240 NYD2_ENTER;
241 store[2] = '\0';
242 store[1] = _qp_itoa16[(ui8_t)c & 0x0F];
243 c = ((ui8_t)c >> 4) & 0x0F;
244 store[0] = _qp_itoa16[(ui8_t)c];
245 NYD2_LEAVE;
246 return store;
249 SINLINE si32_t
250 _qp_cfromhex(char const *hex)
252 ui8_t i1, i2;
253 si32_t rv;
254 NYD2_ENTER;
256 if ((i1 = (ui8_t)hex[0] - '0') >= NELEM(_qp_atoi16) ||
257 (i2 = (ui8_t)hex[1] - '0') >= NELEM(_qp_atoi16))
258 goto jerr;
259 i1 = _qp_atoi16[i1];
260 i2 = _qp_atoi16[i2];
261 if ((i1 | i2) & 0xF0u)
262 goto jerr;
263 rv = i1;
264 rv <<= 4;
265 rv += i2;
266 jleave:
267 NYD2_LEAVE;
268 return rv;
269 jerr:
270 rv = -1;
271 goto jleave;
274 static size_t
275 _b64_decode_prepare(struct str *work, struct str const *in)
277 char *cp;
278 size_t cp_len;
279 NYD2_ENTER;
281 cp = in->s;
282 cp_len = in->l;
284 while (cp_len > 0 && spacechar(*cp))
285 ++cp, --cp_len;
286 work->s = cp;
288 for (cp += cp_len; cp_len > 0; --cp_len) {
289 char c = *--cp;
290 if (!spacechar(c))
291 break;
293 work->l = cp_len;
295 if (cp_len > 16)
296 cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
297 cp_len += 2*3 +1;
298 NYD2_LEAVE;
299 return cp_len;
302 static ssize_t
303 _b64_decode(struct str *out, struct str *in)
305 ssize_t rv = -1;
306 ui8_t *p;
307 ui8_t const *q, *end;
308 NYD2_ENTER;
310 p = (ui8_t*)out->s + out->l;
311 q = (ui8_t const*)in->s;
313 for (end = q + in->l; PTRCMP(q + 4, <=, end);) {
314 ui32_t a = _B64_DECUI8(q[0]), b = _B64_DECUI8(q[1]),
315 c = _B64_DECUI8(q[2]), d = _B64_DECUI8(q[3]);
316 q += 4;
318 if (a >= _B64_EQU || b >= _B64_EQU || c == _B64_BAD || d == _B64_BAD)
319 goto jleave;
321 *p++ = ((a << 2) | ((b & 0x30) >> 4));
322 if (c == _B64_EQU) { /* got '=' */
323 if (d != _B64_EQU)
324 goto jleave;
325 break;
327 *p++ = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
328 if (d == _B64_EQU) /* got '=' */
329 break;
330 *p++ = (((c & 0x03) << 6) | d);
332 rv ^= rv;
334 jleave: {
335 size_t i = PTR2SIZE((char*)p - out->s);
336 out->l = i;
337 if (rv == 0)
338 rv = (ssize_t)i;
340 in->l -= PTR2SIZE((char*)UNCONST(q) - in->s);
341 in->s = UNCONST(q);
342 NYD2_LEAVE;
343 return rv;
346 FL char *
347 mime_char_to_hexseq(char store[3], char c)
349 char *rv;
350 NYD2_ENTER;
352 rv = _qp_ctohex(store, c);
353 NYD2_LEAVE;
354 return rv;
357 FL si32_t
358 mime_hexseq_to_char(char const *hex)
360 si32_t rv;
361 NYD2_ENTER;
363 rv = _qp_cfromhex(hex);
364 NYD2_LEAVE;
365 return rv;
368 FL enum mime_enc
369 mime_enc_target(void)
371 char const *cp;
372 enum mime_enc rv;
373 NYD2_ENTER;
375 if ((cp = ok_vlook(encoding)) == NULL)
376 rv = MIME_DEFAULT_ENCODING;
377 else if (!asccasecmp(cp, "quoted-printable"))
378 rv = MIMEE_QP;
379 else if (!asccasecmp(cp, "8bit"))
380 rv = MIMEE_8B;
381 else if (!asccasecmp(cp, "base64"))
382 rv = MIMEE_B64;
383 else {
384 n_err(_("Warning: invalid *encoding*, using Base64: \"%s\"\n"), cp);
385 rv = MIMEE_B64;
387 NYD2_LEAVE;
388 return rv;
391 FL enum mime_enc
392 mime_enc_from_ctehead(char const *hbody)
394 enum mime_enc rv;
395 NYD2_ENTER;
397 if (hbody == NULL || _is_ct_enc(hbody, "7bit"))
398 rv = MIMEE_7B;
399 else if (_is_ct_enc(hbody, "8bit"))
400 rv = MIMEE_8B;
401 else if (_is_ct_enc(hbody, "base64"))
402 rv = MIMEE_B64;
403 else if (_is_ct_enc(hbody, "binary"))
404 rv = MIMEE_BIN;
405 else if (_is_ct_enc(hbody, "quoted-printable"))
406 rv = MIMEE_QP;
407 else
408 rv = MIMEE_NONE;
409 NYD2_LEAVE;
410 return rv;
413 FL char const *
414 mime_enc_from_conversion(enum conversion const convert) /* TODO booom */
416 char const *rv;
417 NYD_ENTER;
419 switch (convert) {
420 case CONV_7BIT: rv = "7bit"; break;
421 case CONV_8BIT: rv = "8bit"; break;
422 case CONV_TOQP: rv = "quoted-printable"; break;
423 case CONV_TOB64: rv = "base64"; break;
424 default: rv = ""; break;
426 NYD_LEAVE;
427 return rv;
430 FL size_t
431 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags)
433 size_t rv;
434 bool_t sol;
435 NYD_ENTER;
437 for (rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
438 switch (_mustquote(ln, ln + lnlen, sol, flags)) {
439 case US:
440 case EQ:
441 case HT:
442 assert(flags & MIMEEF_ISENCWORD);
443 /* FALLTHRU */
444 case N:
445 continue;
446 default:
447 ++rv;
449 NYD_LEAVE;
450 return rv;
453 FL size_t
454 qp_encode_calc_size(size_t len)
456 size_t bytes, lines;
457 NYD_ENTER;
459 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
460 * However, we must be aware that (a) the output may span multiple lines
461 * and (b) the input does not end with a newline itself (nonetheless):
462 * LC_ALL=C awk 'BEGIN{
463 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
464 * }' |
465 * MAILRC=/dev/null LC_ALL=en_US.UTF-8 s-nail -nvvd \
466 * -Ssendcharsets=utf8 -s testsub ./LETTER */
467 bytes = len * 3;
468 lines = bytes / QP_LINESIZE;
469 len += lines;
471 bytes = len * 3;
472 /* Trailing hard NL may be missing, so there may be two lines.
473 * Thus add soft + hard NL per line and a trailing NUL */
474 lines = (bytes / QP_LINESIZE) + 1;
475 lines <<= 1;
476 bytes += lines;
477 len = ++bytes;
479 NYD_LEAVE;
480 return len;
483 #ifdef notyet
484 FL struct str *
485 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags)
487 struct str in;
488 NYD_ENTER;
490 in.s = UNCONST(cp);
491 in.l = strlen(cp);
492 out = qp_encode(out, &in, flags);
493 NYD_LEAVE;
494 return out;
497 FL struct str *
498 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
499 enum qpflags flags)
501 struct str in;
502 NYD_ENTER;
504 in.s = UNCONST(vp);
505 in.l = vp_len;
506 out = qp_encode(out, &in, flags);
507 NYD_LEAVE;
508 return out;
510 #endif /* notyet */
512 FL struct str *
513 qp_encode(struct str *out, struct str const *in, enum qpflags flags)
515 bool_t sol = (flags & QP_ISHEAD ? FAL0 : TRU1), seenx;
516 ssize_t lnlen;
517 char *qp;
518 char const *is, *ie;
519 NYD_ENTER;
521 if (!(flags & QP_BUF)) {
522 lnlen = qp_encode_calc_size(in->l);
523 out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
525 qp = out->s;
526 is = in->s;
527 ie = is + in->l;
529 /* QP_ISHEAD? */
530 if (!sol) {
531 enum mime_enc_flags ef = MIMEEF_ISHEAD |
532 (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
534 for (seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp) {
535 enum _qact mq = _mustquote(is, ie, sol, ef);
536 char c = *is++;
538 if (mq == N) {
539 /* We convert into a single *encoded-word*, that'll end up in
540 * =?C?Q??=; quote '?' from when we're inside there on */
541 if (seenx && c == '?')
542 goto jheadq;
543 *qp = c;
544 } else if (mq == US)
545 *qp = US;
546 else {
547 seenx = TRU1;
548 jheadq:
549 *qp++ = '=';
550 qp = _qp_ctohex(qp, c) + 1;
553 goto jleave;
556 /* The body needs to take care for soft line breaks etc. */
557 for (lnlen = 0, seenx = FAL0; is < ie; sol = FAL0) {
558 enum _qact mq = _mustquote(is, ie, sol, MIMEEF_NONE);
559 char c = *is++;
561 if (mq == N && (c != '\n' || !seenx)) {
562 *qp++ = c;
563 if (++lnlen < QP_LINESIZE - 1)
564 continue;
565 /* Don't write a soft line break when we're in the last possible
566 * column and either an LF has been written or only an LF follows, as
567 * that'll end the line anyway */
568 /* XXX but - ensure is+1>=ie, then??
569 * xxx and/or - what about resetting lnlen; that contra
570 * xxx dicts input==1 input line assertion, though */
571 if (c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
572 continue;
573 jsoftnl:
574 qp[0] = '=';
575 qp[1] = '\n';
576 qp += 2;
577 lnlen = 0;
578 continue;
581 if (lnlen > QP_LINESIZE - 3 - 1) {
582 qp[0] = '=';
583 qp[1] = '\n';
584 qp += 2;
585 lnlen = 0;
587 *qp++ = '=';
588 qp = _qp_ctohex(qp, c);
589 qp += 2;
590 lnlen += 3;
591 if (c != '\n' || !seenx)
592 seenx = (c == '\r');
593 else {
594 seenx = FAL0;
595 goto jsoftnl;
599 /* Enforce soft line break if we haven't seen LF */
600 if (in->l > 0 && *--is != '\n') {
601 qp[0] = '=';
602 qp[1] = '\n';
603 qp += 2;
605 jleave:
606 out->l = PTR2SIZE(qp - out->s);
607 out->s[out->l] = '\0';
608 NYD_LEAVE;
609 return out;
612 FL int
613 qp_decode(struct str *out, struct str const *in, struct str *rest)
615 int rv = STOP;
616 char *os, *oc;
617 char const *is, *ie;
618 NYD_ENTER;
620 if (rest != NULL && rest->l != 0) {
621 os = out->s;
622 *out = *rest;
623 rest->s = os;
624 rest->l = 0;
627 oc = os =
628 out->s = srealloc(out->s, out->l + in->l + 3);
629 oc += out->l;
630 is = in->s;
631 ie = is + in->l;
633 /* Decoding encoded-word (RFC 2049) in a header field? */
634 if (rest == NULL) {
635 while (is < ie) {
636 si32_t c = *is++;
637 if (c == '=') {
638 if (PTRCMP(is + 1, >=, ie)) {
639 ++is;
640 goto jehead;
642 c = _qp_cfromhex(is);
643 is += 2;
644 if (c >= 0)
645 *oc++ = (char)c;
646 else {
647 /* Invalid according to RFC 2045, section 6.7. Almost follow */
648 jehead:
649 /* TODO 0xFFFD
650 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
651 *oc += 3; 0xFFFD TODO
652 */ *oc++ = '?';
654 } else
655 *oc++ = (c == '_' /* US */) ? ' ' : (char)c;
657 goto jleave; /* XXX QP decode, header: errors not reported */
660 /* Decoding a complete message/mimepart body line */
661 while (is < ie) {
662 si32_t c = *is++;
663 if (c != '=') {
664 *oc++ = (char)c;
665 continue;
668 /* RFC 2045, 6.7:
669 * Therefore, when decoding a Quoted-Printable body, any
670 * trailing white space on a line must be deleted, as it will
671 * necessarily have been added by intermediate transport
672 * agents */
673 for (; is < ie && blankchar(*is); ++is)
675 if (PTRCMP(is + 1, >=, ie)) {
676 /* Soft line break? */
677 if (*is == '\n')
678 goto jsoftnl;
679 ++is;
680 goto jebody;
683 /* Not a soft line break? */
684 if (*is != '\n') {
685 c = _qp_cfromhex(is);
686 is += 2;
687 if (c >= 0)
688 *oc++ = (char)c;
689 else {
690 /* Invalid according to RFC 2045, section 6.7.
691 * Almost follow it and include the = and the follow char */
692 jebody:
693 /* TODO 0xFFFD
694 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
695 *oc += 3; 0xFFFD TODO
696 */ *oc++ = '?';
698 continue;
701 /* CRLF line endings are encoded as QP, followed by a soft line break, so
702 * check for this special case, and simply forget we have seen one, so as
703 * not to end up with the entire DOS file in a contiguous buffer */
704 jsoftnl:
705 if (oc > os && oc[-1] == '\n') {
706 #if 0 /* TODO qp_decode() we do not normalize CRLF
707 * TODO to LF because for that we would need
708 * TODO to know if we are about to write to
709 * TODO the display or do save the file!
710 * TODO 'hope the MIME/send layer rewrite will
711 * TODO offer the possibility to DTRT */
712 if (oc - 1 > os && oc[-2] == '\r') {
713 --oc;
714 oc[-1] = '\n';
716 #endif
717 break;
719 out->l = PTR2SIZE(oc - os);
720 rest->s = srealloc(rest->s, rest->l + out->l);
721 memcpy(rest->s + rest->l, out->s, out->l);
722 rest->l += out->l;
723 oc = os;
724 break;
726 /* XXX RFC: QP decode should check no trailing WS on line */
727 jleave:
728 out->l = PTR2SIZE(oc - os);
729 rv = OKAY;
730 NYD_LEAVE;
731 return rv;
734 FL size_t
735 b64_encode_calc_size(size_t len)
737 NYD_ENTER;
738 len = (len * 4) / 3;
739 len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
740 len += 2 + 1; /* CRLF, \0 */
741 NYD_LEAVE;
742 return len;
745 FL struct str *
746 b64_encode(struct str *out, struct str const *in, enum b64flags flags)
748 ui8_t const *p;
749 ssize_t i, lnlen;
750 char *b64;
751 NYD_ENTER;
753 assert(!(flags & B64_NOPAD) ||
754 !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
756 p = (ui8_t const*)in->s;
758 if (!(flags & B64_BUF)) {
759 i = b64_encode_calc_size(in->l);
760 out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
762 b64 = out->s;
764 if (!(flags & (B64_CRLF | B64_LF)))
765 flags &= ~B64_MULTILINE;
767 for (lnlen = 0, i = (ssize_t)in->l; i > 0; p += 3, i -= 3) {
768 ui32_t a = p[0], b, c;
770 b64[0] = _b64_enctbl[a >> 2];
771 switch (i) {
772 case 1:
773 b64[1] = _b64_enctbl[((a & 0x3) << 4)];
774 b64[2] =
775 b64[3] = '=';
776 break;
777 case 2:
778 b = p[1];
779 b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
780 b64[2] = _b64_enctbl[((b & 0x0F) << 2)];
781 b64[3] = '=';
782 break;
783 default:
784 b = p[1];
785 c = p[2];
786 b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
787 b64[2] = _b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
788 b64[3] = _b64_enctbl[c & 0x3F];
789 break;
792 b64 += 4;
793 if (!(flags & B64_MULTILINE))
794 continue;
795 lnlen += 4;
796 if (lnlen < B64_LINESIZE)
797 continue;
799 lnlen = 0;
800 if (flags & B64_CRLF)
801 *b64++ = '\r';
802 if (flags & (B64_CRLF | B64_LF))
803 *b64++ = '\n';
806 if ((flags & (B64_CRLF | B64_LF)) &&
807 (!(flags & B64_MULTILINE) || lnlen != 0)) {
808 if (flags & B64_CRLF)
809 *b64++ = '\r';
810 if (flags & (B64_CRLF | B64_LF))
811 *b64++ = '\n';
812 } else if (flags & B64_NOPAD)
813 while (b64 != out->s && b64[-1] == '=')
814 --b64;
816 out->l = PTR2SIZE(b64 - out->s);
817 out->s[out->l] = '\0';
819 /* Base64 includes + and /, replace them with _ and -.
820 * This is base64url according to RFC 4648, then. Since we only support
821 * that for encoding and it is only used for boundary strings, this is
822 * yet a primitive implementation; xxx use tables; support decoding */
823 if (flags & B64_RFC4648URL) {
824 char c;
826 for (b64 = out->s; (c = *b64) != '\0'; ++b64)
827 if (c == '+')
828 *b64 = '-';
829 else if (c == '/')
830 *b64 = '_';
832 NYD_LEAVE;
833 return out;
836 FL struct str *
837 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
838 enum b64flags flags)
840 struct str in;
841 NYD_ENTER;
843 in.s = UNCONST(vp);
844 in.l = vp_len;
845 out = b64_encode(out, &in, flags);
846 NYD_LEAVE;
847 return out;
850 #ifdef HAVE_SMTP
851 FL struct str *
852 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags)
854 struct str in;
855 NYD_ENTER;
857 in.s = UNCONST(cp);
858 in.l = strlen(cp);
859 out = b64_encode(out, &in, flags);
860 NYD_LEAVE;
861 return out;
863 #endif
865 FL int
866 b64_decode(struct str *out, struct str const *in, struct str *rest)
868 struct str work;
869 char *x;
870 size_t len;
871 int rv; /* XXX -> bool_t */
872 NYD_ENTER;
874 len = _b64_decode_prepare(&work, in);
875 out->l = 0;
877 /* TODO B64_T is different since we must not fail for errors; in v15.0 this
878 * TODO will be filter based and B64_T will have a different one than B64,
879 * TODO for now special treat this all-horror */
880 if (rest != NULL) {
881 /* With B64_T there may be leftover decoded data for iconv(3), even if
882 * that means it's incomplete multibyte character we have to copy over */
883 /* TODO strictly speaking this should not be handled in here,
884 * TODO since its leftover decoded data from an iconv(3);
885 * TODO In v15.0 this path will be filter based, each filter having its
886 * TODO own buffer for such purpose; for now we are BUSTED since for
887 * TODO Base64 rest is owned by iconv(3) */
888 if (rest->l > 0) {
889 x = out->s;
890 *out = *rest;
891 rest->s = x; /* Just for ownership reasons (all TODO in here..) */
892 rest->l = 0;
893 len += out->l;
896 out->s = srealloc(out->s, len +1);
898 for (;;) {
899 if (_b64_decode(out, &work) >= 0) {
900 if (work.l == 0)
901 break;
903 x = out->s + out->l;
905 /* Partial/False last sequence. TODO not solvable for non-EOF;
906 * TODO yes, invalid, but seen in the wild and should be handled,
907 * TODO but for that we had to have our v15.0 filter which doesn't
908 * TODO work line based but content buffer based */
909 if ((len = work.l) <= 4) {
910 switch (len) {
911 case 4: /* FALLTHRU */
912 case 3: x[2] = '?'; /* FALLTHRU */
913 case 2: x[1] = '?'; /* FALLTHRU */
914 default: x[0] = '?'; break;
916 out->l += len;
917 break;
920 /* TODO Bad content: this problem is not solvable! I've seen
921 * TODO messages which broke lines in the middle of a Base64
922 * TODO tuple, followed by an invalid character ("!"), the follow
923 * TODO line starting with whitespace and the remaining sequence.
924 * TODO OpenSSL bailed, mutt(1) got it right (silently..).
925 * TODO Since "rest" is not usable by us, we cannot continue
926 * TODO sequences. We will be able to do so with the v15.0 filter
927 * TODO approach, if we */
928 /* Bad content: skip over a single sequence */
929 for (;;) {
930 *x++ = '?';
931 ++out->l;
932 if (--work.l == 0)
933 break;
934 else {
935 ui8_t bc = (ui8_t)*++work.s;
936 ui32_t state = _B64_DECUI8(bc);
938 if (state != _B64_EQU && state != _B64_BAD)
939 break;
943 rv = OKAY;
944 goto jleave;
947 /* Ignore an empty input, as may happen for an empty final line */
948 if (work.l == 0) {
949 out->s = srealloc(out->s, 1);
950 rv = OKAY;
951 } else if (work.l >= 4 && !(work.l & 3)) {
952 out->s = srealloc(out->s, len +1);
953 if ((ssize_t)(len = _b64_decode(out, &work)) < 0)
954 goto jerr;
955 rv = OKAY;
956 } else
957 goto jerr;
959 jleave:
960 out->s[out->l] = '\0';
961 NYD_LEAVE;
962 return rv;
964 jerr: {
965 char const *err = _("[Invalid Base64 encoding]\n");
966 out->l = len = strlen(err);
967 out->s = srealloc(out->s, len +1);
968 memcpy(out->s, err, len);
969 rv = STOP;
970 goto jleave;
974 /* s-it-mode */