make-config.in: complete path (leftover of [807f64e2], 2015-12-26!)
[s-mailx.git] / mime-enc.c
blobe1c05c95636a0ebd8af99d784c58690ea82ca5c0
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3 *@ for _header() versions: including "encoded word" as of RFC 2049):
4 *@ - Quoted-Printable, section 6.7
5 *@ - Base64, section 6.8
6 *@ QP quoting and _b64_decode(), b64_encode() inspired from NetBSDs mailx(1):
7 *@ $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $
8 *@ TODO We have no notion of a "current message context" and thus badly log.
9 *@ TODO This is not final yet, v15 will bring "filters".
11 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 * SPDX-License-Identifier: ISC
14 * Permission to use, copy, modify, and/or distribute this software for any
15 * purpose with or without fee is hereby granted, provided that the above
16 * copyright notice and this permission notice appear in all copies.
18 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
19 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
21 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
24 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
26 #undef n_FILE
27 #define n_FILE mime_enc
29 #ifndef HAVE_AMALGAMATION
30 # include "nail.h"
31 #endif
33 enum a_me_qact{
34 a_ME_N = 0,
35 a_ME_Q = 1, /* Must quote */
36 a_ME_SP = 2, /* sp */
37 a_ME_XF = 3, /* Special character 'F' - maybe quoted */
38 a_ME_XD = 4, /* Special character '.' - maybe quoted */
39 a_ME_UU = 5, /* In header, _ must be quoted in encoded word */
40 a_ME_US = '_', /* In header, ' ' must be quoted as _ in encoded word */
41 a_ME_QM = '?', /* In header, special character ? not always quoted */
42 a_ME_EQ = '=', /* In header, '=' must be quoted in encoded word */
43 a_ME_HT ='\t', /* Body HT=SP. Head HT=HT, BUT quote in encoded word */
44 a_ME_NL = 0, /* Don't quote '\n' (NL) */
45 a_ME_CR = a_ME_Q /* Always quote a '\r' (CR) */
48 /* Lookup tables to decide whether a character must be encoded or not.
49 * Email header differences according to RFC 2047, section 4.2:
50 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
51 * - don't care about the special ^F[rom] and ^.$ */
52 static ui8_t const a_me_qp_body[] = {
53 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
54 a_ME_Q, a_ME_SP, a_ME_NL, a_ME_Q, a_ME_Q, a_ME_CR, a_ME_Q, a_ME_Q,
55 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
56 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
57 a_ME_SP, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
58 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XD, a_ME_N,
59 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
60 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q, a_ME_N, a_ME_N,
62 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XF, a_ME_N,
63 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
64 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
65 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
66 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
67 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
68 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
69 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
70 }, a_me_qp_head[] = {
71 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
72 a_ME_Q, a_ME_HT, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
73 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
74 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
75 a_ME_US, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
76 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
77 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
78 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_EQ, a_ME_N, a_ME_QM,
80 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
81 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
82 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
83 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_UU,
84 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
85 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
86 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
87 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
90 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
91 static char const a_me_b64_enctbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
92 "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
93 static signed char const a_me_b64__dectbl[] = {
94 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
95 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
96 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
97 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
98 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
99 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
100 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
101 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
103 #define a_ME_B64_EQU (ui32_t)-2
104 #define a_ME_B64_BAD (ui32_t)-1
105 #define a_ME_B64_DECUI8(C) \
106 ((ui8_t)(C) >= sizeof(a_me_b64__dectbl)\
107 ? a_ME_B64_BAD : (ui32_t)a_me_b64__dectbl[(ui8_t)(C)])
109 /* (Ugly to place an enum here) */
110 static char const a_me_ctes[] = "7bit\0" "8bit\0" \
111 "base64\0" "quoted-printable\0" "binary\0" \
112 /* abbrevs */ "8b\0" "b64\0" "qp\0";
113 enum a_me_ctes_off{
114 a_ME_CTES_7B_OFF = 0, a_ME_CTES_7B_LEN = 4,
115 a_ME_CTES_8B_OFF = 5, a_ME_CTES_8B_LEN = 4,
116 a_ME_CTES_B64_OFF = 10, a_ME_CTES_B64_LEN = 6,
117 a_ME_CTES_QP_OFF = 17, a_ME_CTES_QP_LEN = 16,
118 a_ME_CTES_BIN_OFF = 34, a_ME_CTES_BIN_LEN = 6,
120 a_ME_CTES_S8B_OFF = 41, a_ME_CTES_S8B_LEN = 2,
121 a_ME_CTES_SB64_OFF = 44, a_ME_CTES_SB64_LEN = 3,
122 a_ME_CTES_SQP_OFF = 48, a_ME_CTES_SQP_LEN = 2
125 /* Check whether *s must be quoted according to flags, else body rules;
126 * sol indicates whether we are at the first character of a line/field */
127 n_INLINE enum a_me_qact a_me_mustquote(char const *s, char const *e,
128 bool_t sol, enum mime_enc_flags flags);
130 /* Trim WS and make work point to the decodable range of in.
131 * Return the amount of bytes a b64_decode operation on that buffer requires,
132 * or UIZ_MAX on overflow error */
133 static size_t a_me_b64_decode_prepare(struct str *work, struct str const *in);
135 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
136 * Return number of useful bytes in out or -1 on error.
137 * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
138 static ssize_t a_me_b64_decode(struct str *out, struct str *in);
140 n_INLINE enum a_me_qact
141 a_me_mustquote(char const *s, char const *e, bool_t sol,
142 enum mime_enc_flags flags){
143 ui8_t const *qtab;
144 enum a_me_qact a, r;
145 NYD2_ENTER;
147 qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
148 ? a_me_qp_head : a_me_qp_body;
150 if((ui8_t)*s > 0x7F){
151 r = a_ME_Q;
152 goto jleave;
155 a = qtab[(ui8_t)*s];
157 if((r = a) == a_ME_N || a == a_ME_Q)
158 goto jleave;
160 r = a_ME_Q;
162 /* Special header fields */
163 if(flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)){
164 /* Special massage for encoded words */
165 if(flags & MIMEEF_ISENCWORD){
166 switch(a){
167 case a_ME_HT:
168 case a_ME_US:
169 case a_ME_EQ:
170 r = a;
171 /* FALLTHRU */
172 case a_ME_UU:
173 goto jleave;
174 default:
175 break;
179 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
180 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
181 * should be hard to match */
182 if(a == a_ME_QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
183 goto jleave;
184 goto jnquote;
187 /* Body-only */
189 if(a == a_ME_SP){
190 /* WS only if trailing white space */
191 if(&s[1] == e || s[1] == '\n')
192 goto jleave;
193 goto jnquote;
196 /* Rest are special begin-of-line cases */
197 if(!sol)
198 goto jnquote;
200 /* ^From */
201 if(a == a_ME_XF){
202 if(&s[4] < e && s[1] == 'r' && s[2] == 'o' && s[3] == 'm' && s[4] == ' ')
203 goto jleave;
204 goto jnquote;
206 /* ^.$ */
207 if(a == a_ME_XD && (&s[1] == e || s[1] == '\n'))
208 goto jleave;
209 jnquote:
210 r = 0;
211 jleave:
212 NYD2_LEAVE;
213 return r;
216 static size_t
217 a_me_b64_decode_prepare(struct str *work, struct str const *in){
218 size_t cp_len;
219 NYD2_ENTER;
221 *work = *in;
222 cp_len = n_str_trim(work, n_STR_TRIM_BOTH)->l;
224 if(cp_len > 16){
225 /* n_ERR_OVERFLOW */
226 if(UIZ_MAX / 3 <= cp_len){
227 cp_len = UIZ_MAX;
228 goto jleave;
230 cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
232 cp_len += (2 * 3) +1;
233 jleave:
234 NYD2_LEAVE;
235 return cp_len;
238 static ssize_t
239 a_me_b64_decode(struct str *out, struct str *in){
240 ui8_t *p, pb;
241 ui8_t const *q, *end;
242 ssize_t rv;
243 NYD2_ENTER;
245 rv = -1;
246 p = (ui8_t*)&out->s[out->l];
247 q = (ui8_t const*)in->s;
249 for(end = &q[in->l]; PTR2SIZE(end - q) >= 4; q += 4){
250 ui32_t a, b, c, d;
252 a = a_ME_B64_DECUI8(q[0]);
253 b = a_ME_B64_DECUI8(q[1]);
254 c = a_ME_B64_DECUI8(q[2]);
255 d = a_ME_B64_DECUI8(q[3]);
257 if(n_UNLIKELY(a >= a_ME_B64_EQU || b >= a_ME_B64_EQU ||
258 c == a_ME_B64_BAD || d == a_ME_B64_BAD))
259 goto jleave;
261 pb = ((a << 2) | ((b & 0x30) >> 4));
262 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
263 *p++ = pb;
265 if(c == a_ME_B64_EQU){ /* got '=' */
266 q += 4;
267 if(n_UNLIKELY(d != a_ME_B64_EQU))
268 goto jleave;
269 break;
272 pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
273 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
274 *p++ = pb;
276 if(d == a_ME_B64_EQU) /* got '=' */
277 break;
278 pb = (((c & 0x03) << 6) | d);
279 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
280 *p++ = pb;
282 rv ^= rv;
284 jleave:{
285 size_t i;
287 i = PTR2SIZE((char*)p - out->s);
288 out->l = i;
289 if(rv == 0)
290 rv = (ssize_t)i;
292 in->l -= PTR2SIZE(q - (ui8_t*)in->s);
293 in->s = n_UNCONST(q);
294 NYD2_LEAVE;
295 return rv;
298 FL enum mime_enc
299 mime_enc_target(void){
300 char const *cp, *v15;
301 enum mime_enc rv;
302 NYD2_ENTER;
304 if((v15 = ok_vlook(encoding)) != NULL)
305 n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
307 if((cp = ok_vlook(mime_encoding)) == NULL && (cp = v15) == NULL)
308 rv = MIME_DEFAULT_ENCODING;
309 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_S8B_OFF]) ||
310 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_8B_OFF]))
311 rv = MIMEE_8B;
312 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SB64_OFF]) ||
313 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_B64_OFF]))
314 rv = MIMEE_B64;
315 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SQP_OFF]) ||
316 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_QP_OFF]))
317 rv = MIMEE_QP;
318 else{
319 n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp);
320 rv = MIMEE_B64;
322 NYD2_LEAVE;
323 return rv;
326 FL enum mime_enc
327 mime_enc_from_ctehead(char const *hbody){
328 enum mime_enc rv;
329 NYD2_ENTER;
331 if(hbody == NULL)
332 rv = MIMEE_7B;
333 else{
334 struct{
335 ui8_t off;
336 ui8_t len;
337 ui8_t enc;
338 ui8_t __dummy;
339 } const *cte, cte_base[] = {
340 {a_ME_CTES_7B_OFF, a_ME_CTES_7B_LEN, MIMEE_7B, 0},
341 {a_ME_CTES_8B_OFF, a_ME_CTES_8B_LEN, MIMEE_8B, 0},
342 {a_ME_CTES_B64_OFF, a_ME_CTES_B64_LEN, MIMEE_B64, 0},
343 {a_ME_CTES_QP_OFF, a_ME_CTES_QP_LEN, MIMEE_QP, 0},
344 {a_ME_CTES_BIN_OFF, a_ME_CTES_BIN_LEN, MIMEE_BIN, 0},
345 {0, 0, MIMEE_NONE, 0}
347 union {char const *s; size_t l;} u;
349 if(*hbody == '"')
350 for(u.s = ++hbody; *u.s != '\0' && *u.s != '"'; ++u.s)
352 else
353 for(u.s = hbody; *u.s != '\0' && !whitechar(*u.s); ++u.s)
355 u.l = PTR2SIZE(u.s - hbody);
357 for(cte = cte_base;;)
358 if(cte->len == u.l && !asccasecmp(&a_me_ctes[cte->off], hbody)){
359 rv = cte->enc;
360 break;
361 }else if((++cte)->enc == MIMEE_NONE){
362 rv = MIMEE_NONE;
363 break;
366 NYD2_LEAVE;
367 return rv;
370 FL char const *
371 mime_enc_from_conversion(enum conversion const convert){
372 char const *rv;
373 NYD2_ENTER;
375 switch(convert){
376 case CONV_7BIT: rv = &a_me_ctes[a_ME_CTES_7B_OFF]; break;
377 case CONV_8BIT: rv = &a_me_ctes[a_ME_CTES_8B_OFF]; break;
378 case CONV_TOQP: rv = &a_me_ctes[a_ME_CTES_QP_OFF]; break;
379 case CONV_TOB64: rv = &a_me_ctes[a_ME_CTES_B64_OFF]; break;
380 case CONV_NONE: rv = &a_me_ctes[a_ME_CTES_BIN_OFF]; break;
381 default: rv = n_empty; break;
383 NYD2_LEAVE;
384 return rv;
387 FL size_t
388 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags){
389 size_t rv;
390 bool_t sol;
391 NYD2_ENTER;
393 for(rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
394 switch(a_me_mustquote(ln, ln + lnlen, sol, flags)){
395 case a_ME_US:
396 case a_ME_EQ:
397 case a_ME_HT:
398 assert(flags & MIMEEF_ISENCWORD);
399 /* FALLTHRU */
400 case 0:
401 continue;
402 default:
403 ++rv;
405 NYD2_LEAVE;
406 return rv;
409 FL size_t
410 qp_encode_calc_size(size_t len){
411 size_t bytes, lines;
412 NYD2_ENTER;
414 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
415 * However, we must be aware that (a) the output may span multiple lines
416 * and (b) the input does not end with a newline itself (nonetheless):
417 * LC_ALL=C awk 'BEGIN{
418 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
419 * }' |
420 * s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
422 /* Several n_ERR_OVERFLOW */
423 if(len >= UIZ_MAX / 3){
424 len = UIZ_MAX;
425 goto jleave;
427 bytes = len * 3;
428 lines = bytes / QP_LINESIZE;
429 len += lines;
431 if(len >= UIZ_MAX / 3){
432 len = UIZ_MAX;
433 goto jleave;
435 /* Trailing hard NL may be missing, so there may be two lines.
436 * Thus add soft + hard NL per line and a trailing NUL */
437 bytes = len * 3;
438 lines = (bytes / QP_LINESIZE) + 1;
439 lines <<= 1;
440 ++bytes;
441 /*if(UIZ_MAX - bytes >= lines){
442 len = UIZ_MAX;
443 goto jleave;
445 bytes += lines;
446 len = bytes;
447 jleave:
448 NYD2_LEAVE;
449 return len;
452 #ifdef notyet
453 FL struct str *
454 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags){
455 struct str in;
456 NYD_ENTER;
458 in.s = n_UNCONST(cp);
459 in.l = strlen(cp);
460 out = qp_encode(out, &in, flags);
461 NYD_LEAVE;
462 return out;
465 FL struct str *
466 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
467 enum qpflags flags){
468 struct str in;
469 NYD_ENTER;
471 in.s = n_UNCONST(vp);
472 in.l = vp_len;
473 out = qp_encode(out, &in, flags);
474 NYD_LEAVE;
475 return out;
477 #endif /* notyet */
479 FL struct str *
480 qp_encode(struct str *out, struct str const *in, enum qpflags flags){
481 size_t lnlen;
482 char *qp;
483 char const *is, *ie;
484 bool_t sol, seenx;
485 NYD_ENTER;
487 sol = (flags & QP_ISHEAD ? FAL0 : TRU1);
489 if(!(flags & QP_BUF)){
490 if((lnlen = qp_encode_calc_size(in->l)) == UIZ_MAX){
491 out = NULL;
492 goto jerr;
494 out->s = (flags & QP_SALLOC) ? n_autorec_alloc(lnlen)
495 : n_realloc(out->s, lnlen);
497 qp = out->s;
498 is = in->s;
499 ie = is + in->l;
501 if(flags & QP_ISHEAD){
502 enum mime_enc_flags ef;
504 ef = MIMEEF_ISHEAD | (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
506 for(seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp){
507 char c;
508 enum a_me_qact mq;
510 mq = a_me_mustquote(is, ie, sol, ef);
511 c = *is++;
513 if(mq == a_ME_N){
514 /* We convert into a single *encoded-word*, that'll end up in
515 * =?C?Q??=; quote '?' from when we're inside there on */
516 if(seenx && c == '?')
517 goto jheadq;
518 *qp = c;
519 }else if(mq == a_ME_US)
520 *qp = a_ME_US;
521 else{
522 seenx = TRU1;
523 jheadq:
524 *qp++ = '=';
525 qp = n_c_to_hex_base16(qp, c) + 1;
528 goto jleave;
531 /* The body needs to take care for soft line breaks etc. */
532 for(lnlen = 0, seenx = FAL0; is < ie; sol = FAL0){
533 char c;
534 enum a_me_qact mq;
536 mq = a_me_mustquote(is, ie, sol, MIMEEF_NONE);
537 c = *is++;
539 if(mq == a_ME_N && (c != '\n' || !seenx)){
540 *qp++ = c;
541 if(++lnlen < QP_LINESIZE - 1)
542 continue;
543 /* Don't write a soft line break when we're in the last possible
544 * column and either an LF has been written or only an LF follows, as
545 * that'll end the line anyway */
546 /* XXX but - ensure is+1>=ie, then??
547 * xxx and/or - what about resetting lnlen; that contra
548 * xxx dicts input==1 input line assertion, though */
549 if(c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
550 continue;
551 jsoftnl:
552 qp[0] = '=';
553 qp[1] = '\n';
554 qp += 2;
555 lnlen = 0;
556 continue;
559 if(lnlen > QP_LINESIZE - 3 - 1){
560 qp[0] = '=';
561 qp[1] = '\n';
562 qp += 2;
563 lnlen = 0;
565 *qp++ = '=';
566 qp = n_c_to_hex_base16(qp, c);
567 qp += 2;
568 lnlen += 3;
569 if(c != '\n' || !seenx)
570 seenx = (c == '\r');
571 else{
572 seenx = FAL0;
573 goto jsoftnl;
577 /* Enforce soft line break if we haven't seen LF */
578 if(in->l > 0 && *--is != '\n'){
579 qp[0] = '=';
580 qp[1] = '\n';
581 qp += 2;
583 jleave:
584 out->l = PTR2SIZE(qp - out->s);
585 out->s[out->l] = '\0';
586 jerr:
587 NYD_LEAVE;
588 return out;
591 FL bool_t
592 qp_decode_header(struct str *out, struct str const *in){
593 struct n_string s;
594 char const *is, *ie;
595 NYD_ENTER;
597 /* n_ERR_OVERFLOW */
598 if(UIZ_MAX -1 - out->l <= in->l ||
599 SI32_MAX <= out->l + in->l){ /* XXX wrong, we may replace */
600 out->l = 0;
601 out = NULL;
602 goto jleave;
605 n_string_creat(&s);
606 n_string_reserve(n_string_take_ownership(&s, out->s,
607 (out->l == 0 ? 0 : out->l +1), out->l),
608 in->l + (in->l >> 2));
610 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
611 si32_t c;
613 c = *is++;
614 if(c == '='){
615 if(is >= ie){
616 goto jpushc; /* TODO According to RFC 2045, 6.7,
617 * ++is; TODO we should warn the user, but have no context
618 * goto jehead; TODO to do so; can't over and over */
619 }else if((c = n_c_from_hex_base16(is)) >= 0){
620 is += 2;
621 goto jpushc;
622 }else{
623 /* Invalid according to RFC 2045, section 6.7 */
624 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
625 c = '=';
626 goto jpushc;
627 /* TODO jehead:
628 * TODO if(n_psonce & n_PSO_UNICODE)
629 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
630 * TODO else{
631 * TODO c = '?';
632 * TODO goto jpushc;
633 * TODO }*/
635 }else{
636 jpushc:
637 if(c == '_' /* a_ME_US */)
638 c = ' ';
639 n_string_push_c(&s, (char)c);
643 out->s = n_string_cp(&s);
644 out->l = s.s_len;
645 n_string_gut(n_string_drop_ownership(&s));
646 jleave:
647 NYD_LEAVE;
648 return (out != NULL);
651 FL bool_t
652 qp_decode_part(struct str *out, struct str const *in, struct str *outrest,
653 struct str *inrest_or_null){
654 struct n_string s, *sp;
655 char const *is, *ie;
656 NYD_ENTER;
658 if(outrest->l != 0){
659 is = out->s;
660 *out = *outrest;
661 outrest->s = n_UNCONST(is);
662 outrest->l = 0;
665 /* n_ERR_OVERFLOW */
666 if(UIZ_MAX -1 - out->l <= in->l ||
667 SI32_MAX <= out->l + in->l) /* XXX wrong, we may replace */
668 goto jerr;
670 sp = n_string_creat(&s);
671 sp = n_string_take_ownership(sp, out->s,
672 (out->l == 0 ? 0 : out->l +1), out->l);
673 sp = n_string_reserve(sp, in->l + (in->l >> 2));
675 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
676 si32_t c;
678 if((c = *is++) != '='){
679 jpushc:
680 n_string_push_c(sp, (char)c);
681 continue;
684 /* RFC 2045, 6.7:
685 * Therefore, when decoding a Quoted-Printable body, any
686 * trailing white space on a line must be deleted, as it will
687 * necessarily have been added by intermediate transport
688 * agents */
689 for(; is <= ie && blankchar(*is); ++is)
691 if(is >= ie){
692 /* Soft line break? */
693 if(*is == '\n')
694 goto jsoftnl;
695 goto jpushc; /* TODO According to RFC 2045, 6.7,
696 * ++is; TODO we should warn the user, but have no context
697 * goto jebody; TODO to do so; can't over and over */
700 /* Not a soft line break? */
701 if(*is != '\n'){
702 if((c = n_c_from_hex_base16(is)) >= 0){
703 is += 2;
704 goto jpushc;
706 /* Invalid according to RFC 2045, section 6.7 */
707 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
708 c = '=';
709 goto jpushc;
710 /* TODO jebody:
711 * TODO if(n_psonce & n_PSO_UNICODE)
712 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
713 * TODO else{
714 * TODO c = '?';
715 * TODO goto jpushc;
716 * TODO }*/
719 /* CRLF line endings are encoded as QP, followed by a soft line break, so
720 * check for this special case, and simply forget we have seen one, so as
721 * not to end up with the entire DOS file in a contiguous buffer */
722 jsoftnl:
723 if(sp->s_len > 0 && sp->s_dat[sp->s_len - 1] == '\n'){
724 #if 0 /* TODO qp_decode_part() we do not normalize CRLF
725 * TODO to LF because for that we would need
726 * TODO to know if we are about to write to
727 * TODO the display or do save the file!
728 * TODO 'hope the MIME/send layer rewrite will
729 * TODO offer the possibility to DTRT */
730 if(sp->s_len > 1 && sp->s_dat[sp->s_len - 2] == '\r')
731 n_string_push_c(n_string_trunc(sp, sp->s_len - 2), '\n');
732 #endif
733 break;
736 /* C99 */{
737 char *cp;
738 size_t l;
740 if((l = PTR2SIZE(ie - is)) > 0){
741 if(inrest_or_null == NULL)
742 goto jerr;
743 n_str_assign_buf(inrest_or_null, is, l);
745 cp = outrest->s;
746 outrest->s = n_string_cp(sp);
747 outrest->l = s.s_len;
748 n_string_drop_ownership(sp);
749 if(cp != NULL)
750 n_free(cp);
752 break;
755 out->s = n_string_cp(sp);
756 out->l = sp->s_len;
757 n_string_gut(n_string_drop_ownership(sp));
758 jleave:
759 NYD_LEAVE;
760 return (out != NULL);
761 jerr:
762 out->l = 0;
763 out = NULL;
764 goto jleave;
767 FL size_t
768 b64_encode_calc_size(size_t len){
769 NYD2_ENTER;
770 if(len >= UIZ_MAX / 4)
771 len = UIZ_MAX;
772 else{
773 len = (len * 4) / 3;
774 len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
775 len += 2 + 1; /* CRLF, \0 */
777 NYD2_LEAVE;
778 return len;
781 FL struct str *
782 b64_encode(struct str *out, struct str const *in, enum b64flags flags){
783 ui8_t const *p;
784 size_t i, lnlen;
785 char *b64;
786 NYD_ENTER;
788 assert(!(flags & B64_NOPAD) ||
789 !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
791 p = (ui8_t const*)in->s;
793 if(!(flags & B64_BUF)){
794 if((i = b64_encode_calc_size(in->l)) == UIZ_MAX){
795 out = NULL;
796 goto jleave;
798 out->s = (flags & B64_SALLOC) ? n_autorec_alloc(i)
799 : n_realloc(out->s, i);
801 b64 = out->s;
803 if(!(flags & (B64_CRLF | B64_LF)))
804 flags &= ~B64_MULTILINE;
806 for(lnlen = 0, i = in->l; (ssize_t)i > 0; p += 3, i -= 3){
807 ui32_t a, b, c;
809 a = p[0];
810 b64[0] = a_me_b64_enctbl[a >> 2];
812 switch(i){
813 case 1:
814 b64[1] = a_me_b64_enctbl[((a & 0x3) << 4)];
815 b64[2] =
816 b64[3] = '=';
817 break;
818 case 2:
819 b = p[1];
820 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
821 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2)];
822 b64[3] = '=';
823 break;
824 default:
825 b = p[1];
826 c = p[2];
827 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
828 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
829 b64[3] = a_me_b64_enctbl[c & 0x3F];
830 break;
833 b64 += 4;
834 if(!(flags & B64_MULTILINE))
835 continue;
836 lnlen += 4;
837 if(lnlen < B64_LINESIZE)
838 continue;
840 lnlen = 0;
841 if(flags & B64_CRLF)
842 *b64++ = '\r';
843 if(flags & (B64_CRLF | B64_LF))
844 *b64++ = '\n';
847 if((flags & (B64_CRLF | B64_LF)) &&
848 (!(flags & B64_MULTILINE) || lnlen != 0)){
849 if(flags & B64_CRLF)
850 *b64++ = '\r';
851 if(flags & (B64_CRLF | B64_LF))
852 *b64++ = '\n';
853 }else if(flags & B64_NOPAD)
854 while(b64 != out->s && b64[-1] == '=')
855 --b64;
857 out->l = PTR2SIZE(b64 - out->s);
858 out->s[out->l] = '\0';
860 /* Base64 includes + and /, replace them with _ and -.
861 * This is base64url according to RFC 4648, then. Since we only support
862 * that for encoding and it is only used for boundary strings, this is
863 * yet a primitive implementation; xxx use tables; support decoding */
864 if(flags & B64_RFC4648URL){
865 char c;
867 for(b64 = out->s; (c = *b64) != '\0'; ++b64)
868 if(c == '+')
869 *b64 = '-';
870 else if(c == '/')
871 *b64 = '_';
873 jleave:
874 NYD_LEAVE;
875 return out;
878 FL struct str *
879 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
880 enum b64flags flags){
881 struct str in;
882 NYD_ENTER;
884 in.s = n_UNCONST(vp);
885 in.l = vp_len;
886 out = b64_encode(out, &in, flags);
887 NYD_LEAVE;
888 return out;
891 #ifdef notyet
892 FL struct str *
893 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags){
894 struct str in;
895 NYD_ENTER;
897 in.s = n_UNCONST(cp);
898 in.l = strlen(cp);
899 out = b64_encode(out, &in, flags);
900 NYD_LEAVE;
901 return out;
903 #endif /* notyet */
905 FL bool_t
906 b64_decode(struct str *out, struct str const *in){
907 struct str work;
908 size_t len;
909 NYD_ENTER;
911 out->l = 0;
913 if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
914 goto jerr;
916 /* Ignore an empty input, as may happen for an empty final line */
917 if(work.l == 0)
918 out->s = n_realloc(out->s, 1);
919 else if(work.l >= 4 && !(work.l & 3)){
920 out->s = n_realloc(out->s, len +1);
921 if((ssize_t)(len = a_me_b64_decode(out, &work)) < 0)
922 goto jerr;
923 }else
924 goto jerr;
925 out->s[out->l] = '\0';
926 jleave:
927 NYD_LEAVE;
928 return (out != NULL);
929 jerr:
930 out = NULL;
931 goto jleave;
934 FL bool_t
935 b64_decode_header(struct str *out, struct str const *in){
936 struct str outr, inr;
937 NYD_ENTER;
939 if(!b64_decode(out, in)){
940 memset(&outr, 0, sizeof outr);
941 memset(&inr, 0, sizeof inr);
943 if(!b64_decode_part(out, in, &outr, &inr) || outr.l > 0 || inr.l > 0)
944 out = NULL;
946 if(inr.s != NULL)
947 n_free(inr.s);
948 if(outr.s != NULL)
949 n_free(outr.s);
951 NYD_LEAVE;
952 return (out != NULL);
955 FL bool_t
956 b64_decode_part(struct str *out, struct str const *in, struct str *outrest,
957 struct str *inrest_or_null){
958 struct str work, save;
959 ui32_t a, b, c, b64l;
960 char ca, cb, cc, cx;
961 struct n_string s, workbuf;
962 size_t len;
963 NYD_ENTER;
965 n_string_creat(&s);
966 if((len = out->l) > 0 && out->s[len] == '\0')
967 (void)n_string_take_ownership(&s, out->s, len +1, len);
968 else{
969 if(len > 0)
970 n_string_push_buf(&s, out->s, len);
971 if(out->s != NULL)
972 n_free(out->s);
974 out->s = NULL, out->l = 0;
975 n_string_creat(&workbuf);
977 if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
978 goto jerr;
980 if(outrest->l > 0){
981 n_string_push_buf(&s, outrest->s, outrest->l);
982 outrest->l = 0;
985 /* n_ERR_OVERFLOW */
986 if(UIZ_MAX - len <= s.s_len ||
987 SI32_MAX <= len + s.s_len) /* XXX wrong, we may replace */
988 goto jerr;
990 if(work.l == 0)
991 goto jok;
993 /* This text decoder is extremely expensive, especially given that in all
994 * but _invalid_ cases it is not even needed! So try once to do the normal
995 * decoding, if that fails, go the hard way */
996 save = work;
997 out->s = n_string_resize(&s, len + (out->l = b64l = s.s_len))->s_dat;
999 if(work.l >= 4 && a_me_b64_decode(out, &work) >= 0){
1000 n_string_trunc(&s, out->l);
1001 if(work.l == 0)
1002 goto jok;
1005 n_string_trunc(&s, b64l);
1006 work = save;
1007 out->s = NULL, out->l = 0;
1009 /* TODO b64_decode_part() does not yet STOP if it sees padding, whereas
1010 * TODO OpenSSL and mutt simply bail on such stuff */
1011 n_UNINIT(ca, 0);
1012 n_UNINIT(cb, 0);
1013 n_UNINIT(cc, 0);
1014 for(b64l = 0;;){
1015 ui32_t x;
1017 x = a_ME_B64_DECUI8((ui8_t)(cx = *work.s));
1018 switch(b64l){
1019 case 0:
1020 if(x >= a_ME_B64_EQU)
1021 goto jrepl;
1022 ca = cx;
1023 a = x;
1024 ++b64l;
1025 break;
1026 case 1:
1027 if(x >= a_ME_B64_EQU)
1028 goto jrepl;
1029 cb = cx;
1030 b = x;
1031 ++b64l;
1032 break;
1033 case 2:
1034 if(x == a_ME_B64_BAD)
1035 goto jrepl;
1036 cc = cx;
1037 c = x;
1038 ++b64l;
1039 break;
1040 case 3:
1041 if(x == a_ME_B64_BAD){
1042 jrepl:
1043 /* TODO This would be wrong since iconv(3) may be applied first! */
1044 #if 0
1045 if(n_psonce & n_PSO_UNICODE)
1046 n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
1047 else
1048 n_string_push_c(&s, '?');
1049 #endif
1051 }else if(c == a_ME_B64_EQU && x != a_ME_B64_EQU){
1052 /* This is not only invalid but bogus. Skip it over! */
1053 /* TODO This would be wrong since iconv(3) may be applied first! */
1054 #if 0
1055 n_string_push_buf(&s, n_UNIREPL n_UNIREPL n_UNIREPL n_UNIREPL,
1056 (sizeof(n_UNIREPL) -1) * 4);
1057 #endif
1058 b64l = 0;
1059 }else{
1060 ui8_t pb;
1062 pb = ((a << 2) | ((b & 0x30) >> 4));
1063 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1064 n_string_push_c(&s, (char)pb);
1065 pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
1066 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1067 n_string_push_c(&s, (char)pb);
1068 if(x != a_ME_B64_EQU){
1069 pb = (((c & 0x03) << 6) | x);
1070 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1071 n_string_push_c(&s, (char)pb);
1073 ++b64l;
1075 break;
1078 ++work.s;
1079 if(--work.l == 0){
1080 if(b64l > 0 && b64l != 4){
1081 if(inrest_or_null == NULL)
1082 goto jerr;
1083 inrest_or_null->s = n_realloc(inrest_or_null->s, b64l +1);
1084 inrest_or_null->s[0] = ca;
1085 if(b64l > 1)
1086 inrest_or_null->s[1] = cb;
1087 if(b64l > 2)
1088 inrest_or_null->s[2] = cc;
1089 inrest_or_null->s[inrest_or_null->l = b64l] = '\0';
1091 goto jok;
1093 if(b64l == 4)
1094 b64l = 0;
1097 jok:
1098 out->s = n_string_cp(&s);
1099 out->l = s.s_len;
1100 n_string_drop_ownership(&s);
1101 jleave:
1102 n_string_gut(&workbuf);
1103 n_string_gut(&s);
1104 NYD_LEAVE;
1105 return (out != NULL);
1106 jerr:
1107 out = NULL;
1108 goto jleave;
1111 /* s-it-mode */