`write'++: !interactive: urlxenc() attachment paths (Ralph Corderoy)..
[s-mailx.git] / mime_enc.c
blob7d890df7553522691ed09f266ea7f9effb6a2f4e
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047):
3 *@ - Quoted-Printable, section 6.7
4 *@ - Base64, section 6.8
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 */
9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
12 * Copyright (c) 2006 The NetBSD Foundation, Inc.
13 * All rights reserved.
15 * This code is derived from software contributed to The NetBSD Foundation
16 * by Anon Ymous.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
39 #undef n_FILE
40 #define n_FILE mime_enc
42 #ifndef HAVE_AMALGAMATION
43 # include "nail.h"
44 #endif
46 enum _qact {
47 N = 0, /* Do not quote */
48 Q = 1, /* Must quote */
49 SP = 2, /* sp */
50 XF = 3, /* Special character 'F' - maybe quoted */
51 XD = 4, /* Special character '.' - maybe quoted */
52 UU = 5, /* In header, _ must be quoted in encoded word */
53 US = '_', /* In header, ' ' must be quoted as _ in encoded word */
54 QM = '?', /* In header, special character ? not always quoted */
55 EQ = '=', /* In header, '=' must be quoted in encoded word */
56 HT ='\t', /* In body HT=SP, in head HT=HT, but quote in encoded word */
57 NL = N, /* Don't quote '\n' (NL) */
58 CR = Q /* Always quote a '\r' (CR) */
61 /* Lookup tables to decide whether a character must be encoded or not.
62 * Email header differences according to RFC 2047, section 4.2:
63 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
64 * - don't care about the special ^F[rom] and ^.$ */
65 static ui8_t const _qtab_body[] = {
66 Q, Q, Q, Q, Q, Q, Q, Q, Q,SP,NL, Q, Q,CR, Q, Q,
67 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
68 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
69 N, N, N, N, N, N, N, N, N, N, N, N, N, Q, N, N,
71 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
72 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
73 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
74 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
76 _qtab_head[] = {
77 Q, Q, Q, Q, Q, Q, Q, Q, Q,HT, Q, Q, Q, Q, Q, Q,
78 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
79 US, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
80 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N,QM,
82 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
83 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,UU,
84 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
85 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
88 /* The decoding table is only accessed via _B64_DECUI8() */
89 static char const _b64_enctbl[] =
90 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
91 static signed char const _b64__dectbl[] = {
92 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
93 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
94 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
95 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
96 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
97 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
98 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
99 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
101 #define _B64_EQU (ui32_t)-2
102 #define _B64_BAD (ui32_t)-1
103 #define _B64_DECUI8(C) \
104 ((C) >= sizeof(_b64__dectbl) ? _B64_BAD : (ui32_t)_b64__dectbl[(ui8_t)(C)])
106 /* ASCII case-insensitive check whether Content-Transfer-Encoding: header body
107 * hbody defined this encoding type */
108 static bool_t _is_ct_enc(char const *hbody, char const *encoding);
110 /* Check whether *s must be quoted according to flags, else body rules;
111 * sol indicates whether we are at the first character of a line/field */
112 SINLINE enum _qact _mustquote(char const *s, char const *e, bool_t sol,
113 enum mime_enc_flags flags);
115 /* Trim WS and make work point to the decodable range of in*
116 * Return the amount of bytes a b64_decode operation on that buffer requires */
117 static size_t _b64_decode_prepare(struct str *work,
118 struct str const *in);
120 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put).
121 * Return number of useful bytes in out or -1 on error */
122 static ssize_t _b64_decode(struct str *out, struct str *in);
124 static bool_t
125 _is_ct_enc(char const *hbody, char const *encoding)
127 bool_t quoted, rv;
128 int c;
129 NYD2_ENTER;
131 if (*hbody == '"')
132 quoted = TRU1, ++hbody;
133 else
134 quoted = FAL0;
135 rv = FAL0;
137 while (*hbody != '\0' && *encoding != '\0')
138 if ((c = *hbody++, lowerconv(c) != *encoding++))
139 goto jleave;
140 rv = TRU1;
142 if (quoted && *hbody == '"')
143 goto jleave;
144 if (*hbody == '\0' || whitechar(*hbody))
145 goto jleave;
146 rv = FAL0;
147 jleave:
148 NYD2_LEAVE;
149 return rv;
152 SINLINE enum _qact
153 _mustquote(char const *s, char const *e, bool_t sol, enum mime_enc_flags flags)
155 ui8_t const *qtab;
156 enum _qact a, r;
157 NYD2_ENTER;
159 qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
160 ? _qtab_head : _qtab_body;
161 a = ((ui8_t)*s > 0x7F) ? Q : qtab[(ui8_t)*s];
163 if ((r = a) == N || (r = a) == Q)
164 goto jleave;
165 r = Q;
167 /* Special header fields */
168 if (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)) {
169 /* Special massage for encoded words */
170 if (flags & MIMEEF_ISENCWORD) {
171 switch (a) {
172 case HT:
173 case US:
174 case EQ:
175 r = a;
176 /* FALLTHRU */
177 case UU:
178 goto jleave;
179 default:
180 break;
184 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
185 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
186 * should be hard too match */
187 if (a == QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
188 goto jleave;
189 goto jnquote;
192 /* Body-only */
194 if (a == SP) {
195 /* WS only if trailing white space */
196 if (PTRCMP(s + 1, ==, e) || s[1] == '\n')
197 goto jleave;
198 goto jnquote;
201 /* Rest are special begin-of-line cases */
202 if (!sol)
203 goto jnquote;
205 /* ^From */
206 if (a == XF) {
207 if (PTRCMP(s + 4, <, e) && s[1] == 'r' && s[2] == 'o' && s[3] == 'm')
208 goto jleave;
209 goto jnquote;
211 /* ^.$ */
212 if (a == XD && (PTRCMP(s + 1, ==, e) || s[1] == '\n'))
213 goto jleave;
214 jnquote:
215 r = N;
216 jleave:
217 NYD2_LEAVE;
218 return r;
221 static size_t
222 _b64_decode_prepare(struct str *work, struct str const *in)
224 char *cp;
225 size_t cp_len;
226 NYD2_ENTER;
228 cp = in->s;
229 cp_len = in->l;
231 while (cp_len > 0 && spacechar(*cp))
232 ++cp, --cp_len;
233 work->s = cp;
235 for (cp += cp_len; cp_len > 0; --cp_len) {
236 char c = *--cp;
237 if (!spacechar(c))
238 break;
240 work->l = cp_len;
242 if (cp_len > 16)
243 cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
244 cp_len += 2*3 +1;
245 NYD2_LEAVE;
246 return cp_len;
249 static ssize_t
250 _b64_decode(struct str *out, struct str *in)
252 ssize_t rv = -1;
253 ui8_t *p;
254 ui8_t const *q, *end;
255 NYD2_ENTER;
257 p = (ui8_t*)out->s + out->l;
258 q = (ui8_t const*)in->s;
260 for (end = q + in->l; PTRCMP(q + 4, <=, end);) {
261 ui32_t a = _B64_DECUI8(q[0]), b = _B64_DECUI8(q[1]),
262 c = _B64_DECUI8(q[2]), d = _B64_DECUI8(q[3]);
263 q += 4;
265 if (a >= _B64_EQU || b >= _B64_EQU || c == _B64_BAD || d == _B64_BAD)
266 goto jleave;
268 *p++ = ((a << 2) | ((b & 0x30) >> 4));
269 if (c == _B64_EQU) { /* got '=' */
270 if (d != _B64_EQU)
271 goto jleave;
272 break;
274 *p++ = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
275 if (d == _B64_EQU) /* got '=' */
276 break;
277 *p++ = (((c & 0x03) << 6) | d);
279 rv ^= rv;
281 jleave: {
282 size_t i = PTR2SIZE((char*)p - out->s);
283 out->l = i;
284 if (rv == 0)
285 rv = (ssize_t)i;
287 in->l -= PTR2SIZE((char*)UNCONST(q) - in->s);
288 in->s = UNCONST(q);
289 NYD2_LEAVE;
290 return rv;
293 FL enum mime_enc
294 mime_enc_target(void)
296 char const *cp;
297 enum mime_enc rv;
298 NYD2_ENTER;
300 if ((cp = ok_vlook(encoding)) == NULL)
301 rv = MIME_DEFAULT_ENCODING;
302 else if (!asccasecmp(cp, "quoted-printable"))
303 rv = MIMEE_QP;
304 else if (!asccasecmp(cp, "8bit"))
305 rv = MIMEE_8B;
306 else if (!asccasecmp(cp, "base64"))
307 rv = MIMEE_B64;
308 else {
309 n_err(_("Warning: invalid *encoding*, using Base64: %s\n"), cp);
310 rv = MIMEE_B64;
312 NYD2_LEAVE;
313 return rv;
316 FL enum mime_enc
317 mime_enc_from_ctehead(char const *hbody)
319 enum mime_enc rv;
320 NYD2_ENTER;
322 if (hbody == NULL || _is_ct_enc(hbody, "7bit"))
323 rv = MIMEE_7B;
324 else if (_is_ct_enc(hbody, "8bit"))
325 rv = MIMEE_8B;
326 else if (_is_ct_enc(hbody, "base64"))
327 rv = MIMEE_B64;
328 else if (_is_ct_enc(hbody, "binary"))
329 rv = MIMEE_BIN;
330 else if (_is_ct_enc(hbody, "quoted-printable"))
331 rv = MIMEE_QP;
332 else
333 rv = MIMEE_NONE;
334 NYD2_LEAVE;
335 return rv;
338 FL char const *
339 mime_enc_from_conversion(enum conversion const convert) /* TODO booom */
341 char const *rv;
342 NYD_ENTER;
344 switch (convert) {
345 case CONV_7BIT: rv = "7bit"; break;
346 case CONV_8BIT: rv = "8bit"; break;
347 case CONV_TOQP: rv = "quoted-printable"; break;
348 case CONV_TOB64: rv = "base64"; break;
349 default: rv = ""; break;
351 NYD_LEAVE;
352 return rv;
355 FL size_t
356 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags)
358 size_t rv;
359 bool_t sol;
360 NYD_ENTER;
362 for (rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
363 switch (_mustquote(ln, ln + lnlen, sol, flags)) {
364 case US:
365 case EQ:
366 case HT:
367 assert(flags & MIMEEF_ISENCWORD);
368 /* FALLTHRU */
369 case N:
370 continue;
371 default:
372 ++rv;
374 NYD_LEAVE;
375 return rv;
378 FL size_t
379 qp_encode_calc_size(size_t len)
381 size_t bytes, lines;
382 NYD_ENTER;
384 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
385 * However, we must be aware that (a) the output may span multiple lines
386 * and (b) the input does not end with a newline itself (nonetheless):
387 * LC_ALL=C awk 'BEGIN{
388 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
389 * }' |
390 * MAILRC=/dev/null LC_ALL=en_US.UTF-8 s-nail -nvvd \
391 * -Ssendcharsets=utf8 -s testsub ./LETTER */
392 bytes = len * 3;
393 lines = bytes / QP_LINESIZE;
394 len += lines;
396 bytes = len * 3;
397 /* Trailing hard NL may be missing, so there may be two lines.
398 * Thus add soft + hard NL per line and a trailing NUL */
399 lines = (bytes / QP_LINESIZE) + 1;
400 lines <<= 1;
401 bytes += lines;
402 len = ++bytes;
404 NYD_LEAVE;
405 return len;
408 #ifdef notyet
409 FL struct str *
410 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags)
412 struct str in;
413 NYD_ENTER;
415 in.s = UNCONST(cp);
416 in.l = strlen(cp);
417 out = qp_encode(out, &in, flags);
418 NYD_LEAVE;
419 return out;
422 FL struct str *
423 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
424 enum qpflags flags)
426 struct str in;
427 NYD_ENTER;
429 in.s = UNCONST(vp);
430 in.l = vp_len;
431 out = qp_encode(out, &in, flags);
432 NYD_LEAVE;
433 return out;
435 #endif /* notyet */
437 FL struct str *
438 qp_encode(struct str *out, struct str const *in, enum qpflags flags)
440 bool_t sol = (flags & QP_ISHEAD ? FAL0 : TRU1), seenx;
441 ssize_t lnlen;
442 char *qp;
443 char const *is, *ie;
444 NYD_ENTER;
446 if (!(flags & QP_BUF)) {
447 lnlen = qp_encode_calc_size(in->l);
448 out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
450 qp = out->s;
451 is = in->s;
452 ie = is + in->l;
454 /* QP_ISHEAD? */
455 if (!sol) {
456 enum mime_enc_flags ef = MIMEEF_ISHEAD |
457 (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
459 for (seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp) {
460 enum _qact mq = _mustquote(is, ie, sol, ef);
461 char c = *is++;
463 if (mq == N) {
464 /* We convert into a single *encoded-word*, that'll end up in
465 * =?C?Q??=; quote '?' from when we're inside there on */
466 if (seenx && c == '?')
467 goto jheadq;
468 *qp = c;
469 } else if (mq == US)
470 *qp = US;
471 else {
472 seenx = TRU1;
473 jheadq:
474 *qp++ = '=';
475 qp = n_c_to_hex_base16(qp, c) + 1;
478 goto jleave;
481 /* The body needs to take care for soft line breaks etc. */
482 for (lnlen = 0, seenx = FAL0; is < ie; sol = FAL0) {
483 enum _qact mq = _mustquote(is, ie, sol, MIMEEF_NONE);
484 char c = *is++;
486 if (mq == N && (c != '\n' || !seenx)) {
487 *qp++ = c;
488 if (++lnlen < QP_LINESIZE - 1)
489 continue;
490 /* Don't write a soft line break when we're in the last possible
491 * column and either an LF has been written or only an LF follows, as
492 * that'll end the line anyway */
493 /* XXX but - ensure is+1>=ie, then??
494 * xxx and/or - what about resetting lnlen; that contra
495 * xxx dicts input==1 input line assertion, though */
496 if (c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
497 continue;
498 jsoftnl:
499 qp[0] = '=';
500 qp[1] = '\n';
501 qp += 2;
502 lnlen = 0;
503 continue;
506 if (lnlen > QP_LINESIZE - 3 - 1) {
507 qp[0] = '=';
508 qp[1] = '\n';
509 qp += 2;
510 lnlen = 0;
512 *qp++ = '=';
513 qp = n_c_to_hex_base16(qp, c);
514 qp += 2;
515 lnlen += 3;
516 if (c != '\n' || !seenx)
517 seenx = (c == '\r');
518 else {
519 seenx = FAL0;
520 goto jsoftnl;
524 /* Enforce soft line break if we haven't seen LF */
525 if (in->l > 0 && *--is != '\n') {
526 qp[0] = '=';
527 qp[1] = '\n';
528 qp += 2;
530 jleave:
531 out->l = PTR2SIZE(qp - out->s);
532 out->s[out->l] = '\0';
533 NYD_LEAVE;
534 return out;
537 FL int
538 qp_decode(struct str *out, struct str const *in, struct str *rest)
540 int rv = STOP;
541 char *os, *oc;
542 char const *is, *ie;
543 NYD_ENTER;
545 if (rest != NULL && rest->l != 0) {
546 os = out->s;
547 *out = *rest;
548 rest->s = os;
549 rest->l = 0;
552 oc = os =
553 out->s = srealloc(out->s, out->l + in->l + 3);
554 oc += out->l;
555 is = in->s;
556 ie = is + in->l;
558 /* Decoding encoded-word (RFC 2049) in a header field? */
559 if (rest == NULL) {
560 while (is < ie) {
561 si32_t c = *is++;
562 if (c == '=') {
563 if (PTRCMP(is + 1, >=, ie)) {
564 ++is;
565 goto jehead;
567 c = n_c_from_hex_base16(is);
568 is += 2;
569 if (c >= 0)
570 *oc++ = (char)c;
571 else {
572 /* Invalid according to RFC 2045, section 6.7. Almost follow */
573 jehead:
574 /* TODO 0xFFFD
575 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
576 *oc += 3; 0xFFFD TODO
577 */ *oc++ = '?';
579 } else
580 *oc++ = (c == '_' /* US */) ? ' ' : (char)c;
582 goto jleave; /* XXX QP decode, header: errors not reported */
585 /* Decoding a complete message/mimepart body line */
586 while (is < ie) {
587 si32_t c = *is++;
588 if (c != '=') {
589 *oc++ = (char)c;
590 continue;
593 /* RFC 2045, 6.7:
594 * Therefore, when decoding a Quoted-Printable body, any
595 * trailing white space on a line must be deleted, as it will
596 * necessarily have been added by intermediate transport
597 * agents */
598 for (; is < ie && blankchar(*is); ++is)
600 if (PTRCMP(is + 1, >=, ie)) {
601 /* Soft line break? */
602 if (*is == '\n')
603 goto jsoftnl;
604 ++is;
605 goto jebody;
608 /* Not a soft line break? */
609 if (*is != '\n') {
610 c = n_c_from_hex_base16(is);
611 is += 2;
612 if (c >= 0)
613 *oc++ = (char)c;
614 else {
615 /* Invalid according to RFC 2045, section 6.7.
616 * Almost follow it and include the = and the follow char */
617 jebody:
618 /* TODO 0xFFFD
619 *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
620 *oc += 3; 0xFFFD TODO
621 */ *oc++ = '?';
623 continue;
626 /* CRLF line endings are encoded as QP, followed by a soft line break, so
627 * check for this special case, and simply forget we have seen one, so as
628 * not to end up with the entire DOS file in a contiguous buffer */
629 jsoftnl:
630 if (oc > os && oc[-1] == '\n') {
631 #if 0 /* TODO qp_decode() we do not normalize CRLF
632 * TODO to LF because for that we would need
633 * TODO to know if we are about to write to
634 * TODO the display or do save the file!
635 * TODO 'hope the MIME/send layer rewrite will
636 * TODO offer the possibility to DTRT */
637 if (oc - 1 > os && oc[-2] == '\r') {
638 --oc;
639 oc[-1] = '\n';
641 #endif
642 break;
644 out->l = PTR2SIZE(oc - os);
645 rest->s = srealloc(rest->s, rest->l + out->l);
646 memcpy(rest->s + rest->l, out->s, out->l);
647 rest->l += out->l;
648 oc = os;
649 break;
651 /* XXX RFC: QP decode should check no trailing WS on line */
652 jleave:
653 out->l = PTR2SIZE(oc - os);
654 rv = OKAY;
655 NYD_LEAVE;
656 return rv;
659 FL size_t
660 b64_encode_calc_size(size_t len)
662 NYD_ENTER;
663 len = (len * 4) / 3;
664 len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
665 len += 2 + 1; /* CRLF, \0 */
666 NYD_LEAVE;
667 return len;
670 FL struct str *
671 b64_encode(struct str *out, struct str const *in, enum b64flags flags)
673 ui8_t const *p;
674 ssize_t i, lnlen;
675 char *b64;
676 NYD_ENTER;
678 assert(!(flags & B64_NOPAD) ||
679 !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
681 p = (ui8_t const*)in->s;
683 if (!(flags & B64_BUF)) {
684 i = b64_encode_calc_size(in->l);
685 out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
687 b64 = out->s;
689 if (!(flags & (B64_CRLF | B64_LF)))
690 flags &= ~B64_MULTILINE;
692 for (lnlen = 0, i = (ssize_t)in->l; i > 0; p += 3, i -= 3) {
693 ui32_t a = p[0], b, c;
695 b64[0] = _b64_enctbl[a >> 2];
696 switch (i) {
697 case 1:
698 b64[1] = _b64_enctbl[((a & 0x3) << 4)];
699 b64[2] =
700 b64[3] = '=';
701 break;
702 case 2:
703 b = p[1];
704 b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
705 b64[2] = _b64_enctbl[((b & 0x0F) << 2)];
706 b64[3] = '=';
707 break;
708 default:
709 b = p[1];
710 c = p[2];
711 b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
712 b64[2] = _b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
713 b64[3] = _b64_enctbl[c & 0x3F];
714 break;
717 b64 += 4;
718 if (!(flags & B64_MULTILINE))
719 continue;
720 lnlen += 4;
721 if (lnlen < B64_LINESIZE)
722 continue;
724 lnlen = 0;
725 if (flags & B64_CRLF)
726 *b64++ = '\r';
727 if (flags & (B64_CRLF | B64_LF))
728 *b64++ = '\n';
731 if ((flags & (B64_CRLF | B64_LF)) &&
732 (!(flags & B64_MULTILINE) || lnlen != 0)) {
733 if (flags & B64_CRLF)
734 *b64++ = '\r';
735 if (flags & (B64_CRLF | B64_LF))
736 *b64++ = '\n';
737 } else if (flags & B64_NOPAD)
738 while (b64 != out->s && b64[-1] == '=')
739 --b64;
741 out->l = PTR2SIZE(b64 - out->s);
742 out->s[out->l] = '\0';
744 /* Base64 includes + and /, replace them with _ and -.
745 * This is base64url according to RFC 4648, then. Since we only support
746 * that for encoding and it is only used for boundary strings, this is
747 * yet a primitive implementation; xxx use tables; support decoding */
748 if (flags & B64_RFC4648URL) {
749 char c;
751 for (b64 = out->s; (c = *b64) != '\0'; ++b64)
752 if (c == '+')
753 *b64 = '-';
754 else if (c == '/')
755 *b64 = '_';
757 NYD_LEAVE;
758 return out;
761 FL struct str *
762 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
763 enum b64flags flags)
765 struct str in;
766 NYD_ENTER;
768 in.s = UNCONST(vp);
769 in.l = vp_len;
770 out = b64_encode(out, &in, flags);
771 NYD_LEAVE;
772 return out;
775 #ifdef HAVE_SMTP
776 FL struct str *
777 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags)
779 struct str in;
780 NYD_ENTER;
782 in.s = UNCONST(cp);
783 in.l = strlen(cp);
784 out = b64_encode(out, &in, flags);
785 NYD_LEAVE;
786 return out;
788 #endif
790 FL int
791 b64_decode(struct str *out, struct str const *in, struct str *rest)
793 struct str work;
794 char *x;
795 size_t len;
796 int rv; /* XXX -> bool_t */
797 NYD_ENTER;
799 len = _b64_decode_prepare(&work, in);
800 out->l = 0;
802 /* TODO B64_T is different since we must not fail for errors; in v15.0 this
803 * TODO will be filter based and B64_T will have a different one than B64,
804 * TODO for now special treat this all-horror */
805 if (rest != NULL) {
806 /* With B64_T there may be leftover decoded data for iconv(3), even if
807 * that means it's incomplete multibyte character we have to copy over */
808 /* TODO strictly speaking this should not be handled in here,
809 * TODO since its leftover decoded data from an iconv(3);
810 * TODO In v15.0 this path will be filter based, each filter having its
811 * TODO own buffer for such purpose; for now we are BUSTED since for
812 * TODO Base64 rest is owned by iconv(3) */
813 if (rest->l > 0) {
814 x = out->s;
815 *out = *rest;
816 rest->s = x; /* Just for ownership reasons (all TODO in here..) */
817 rest->l = 0;
818 len += out->l;
821 out->s = srealloc(out->s, len +1);
823 for (;;) {
824 if (_b64_decode(out, &work) >= 0) {
825 if (work.l == 0)
826 break;
828 x = out->s + out->l;
830 /* Partial/False last sequence. TODO not solvable for non-EOF;
831 * TODO yes, invalid, but seen in the wild and should be handled,
832 * TODO but for that we had to have our v15.0 filter which doesn't
833 * TODO work line based but content buffer based */
834 if ((len = work.l) <= 4) {
835 switch (len) {
836 case 4: /* FALLTHRU */
837 case 3: x[2] = '?'; /* FALLTHRU */
838 case 2: x[1] = '?'; /* FALLTHRU */
839 default: x[0] = '?'; break;
841 out->l += len;
842 break;
845 /* TODO Bad content: this problem is not solvable! I've seen
846 * TODO messages which broke lines in the middle of a Base64
847 * TODO tuple, followed by an invalid character ("!"), the follow
848 * TODO line starting with whitespace and the remaining sequence.
849 * TODO OpenSSL bailed, mutt(1) got it right (silently..).
850 * TODO Since "rest" is not usable by us, we cannot continue
851 * TODO sequences. We will be able to do so with the v15.0 filter
852 * TODO approach, if we */
853 /* Bad content: skip over a single sequence */
854 for (;;) {
855 *x++ = '?';
856 ++out->l;
857 if (--work.l == 0)
858 break;
859 else {
860 ui8_t bc = (ui8_t)*++work.s;
861 ui32_t state = _B64_DECUI8(bc);
863 if (state != _B64_EQU && state != _B64_BAD)
864 break;
868 rv = OKAY;
869 goto jleave;
872 /* Ignore an empty input, as may happen for an empty final line */
873 if (work.l == 0) {
874 out->s = srealloc(out->s, 1);
875 rv = OKAY;
876 } else if (work.l >= 4 && !(work.l & 3)) {
877 out->s = srealloc(out->s, len +1);
878 if ((ssize_t)(len = _b64_decode(out, &work)) < 0)
879 goto jerr;
880 rv = OKAY;
881 } else
882 goto jerr;
884 jleave:
885 out->s[out->l] = '\0';
886 NYD_LEAVE;
887 return rv;
889 jerr: {
890 char const *err = _("[Invalid Base64 encoding]\n");
891 out->l = len = strlen(err);
892 out->s = srealloc(out->s, len +1);
893 memcpy(out->s, err, len);
894 rv = STOP;
895 goto jleave;
899 /* s-it-mode */