Change *on-compose-{enter,leave}* environment + fixes..
[s-mailx.git] / mime-enc.c
blob7462af5f30ac1f2e64eca319c186425c3052a711
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3 *@ for _header() versions: including "encoded word" as of RFC 2049):
4 *@ - Quoted-Printable, section 6.7
5 *@ - Base64, section 6.8
6 *@ TODO We have no notion of a "current message context" and thus badly log.
7 *@ TODO This is not final yet, v15 will bring "filters".
9 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
10 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
13 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
15 * Copyright (c) 2006 The NetBSD Foundation, Inc.
16 * All rights reserved.
18 * This code is derived from software contributed to The NetBSD Foundation
19 * by Anon Ymous.
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
23 * are met:
24 * 1. Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * 2. Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
30 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
42 #undef n_FILE
43 #define n_FILE mime_enc
45 #ifndef HAVE_AMALGAMATION
46 # include "nail.h"
47 #endif
49 enum a_me_qact{
50 a_ME_N = 0,
51 a_ME_Q = 1, /* Must quote */
52 a_ME_SP = 2, /* sp */
53 a_ME_XF = 3, /* Special character 'F' - maybe quoted */
54 a_ME_XD = 4, /* Special character '.' - maybe quoted */
55 a_ME_UU = 5, /* In header, _ must be quoted in encoded word */
56 a_ME_US = '_', /* In header, ' ' must be quoted as _ in encoded word */
57 a_ME_QM = '?', /* In header, special character ? not always quoted */
58 a_ME_EQ = '=', /* In header, '=' must be quoted in encoded word */
59 a_ME_HT ='\t', /* Body HT=SP. Head HT=HT, BUT quote in encoded word */
60 a_ME_NL = 0, /* Don't quote '\n' (NL) */
61 a_ME_CR = a_ME_Q /* Always quote a '\r' (CR) */
64 /* Lookup tables to decide whether a character must be encoded or not.
65 * Email header differences according to RFC 2047, section 4.2:
66 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
67 * - don't care about the special ^F[rom] and ^.$ */
68 static ui8_t const a_me_qp_body[] = {
69 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
70 a_ME_Q, a_ME_SP, a_ME_NL, a_ME_Q, a_ME_Q, a_ME_CR, a_ME_Q, a_ME_Q,
71 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
72 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
73 a_ME_SP, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
74 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XD, a_ME_N,
75 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
76 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q, a_ME_N, a_ME_N,
78 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XF, a_ME_N,
79 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
80 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
81 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
82 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
83 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
84 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
85 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
86 }, a_me_qp_head[] = {
87 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
88 a_ME_Q, a_ME_HT, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
89 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
90 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
91 a_ME_US, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
92 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
93 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
94 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_EQ, a_ME_N, a_ME_QM,
96 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
97 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
98 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
99 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_UU,
100 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
101 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
102 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
103 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
106 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
107 static char const a_me_b64_enctbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
108 "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
109 static signed char const a_me_b64__dectbl[] = {
110 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
111 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
112 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
113 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
114 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
115 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
116 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
117 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
119 #define a_ME_B64_EQU (ui32_t)-2
120 #define a_ME_B64_BAD (ui32_t)-1
121 #define a_ME_B64_DECUI8(C) \
122 ((ui8_t)(C) >= sizeof(a_me_b64__dectbl)\
123 ? a_ME_B64_BAD : (ui32_t)a_me_b64__dectbl[(ui8_t)(C)])
125 /* (Ugly to place an enum here) */
126 static char const a_me_ctes[] = "7bit\0" "8bit\0" \
127 "base64\0" "quoted-printable\0" "binary\0" \
128 /* abbrevs */ "8b\0" "b64\0" "qp\0";
129 enum a_me_ctes_off{
130 a_ME_CTES_7B_OFF = 0, a_ME_CTES_7B_LEN = 4,
131 a_ME_CTES_8B_OFF = 5, a_ME_CTES_8B_LEN = 4,
132 a_ME_CTES_B64_OFF = 10, a_ME_CTES_B64_LEN = 6,
133 a_ME_CTES_QP_OFF = 17, a_ME_CTES_QP_LEN = 16,
134 a_ME_CTES_BIN_OFF = 34, a_ME_CTES_BIN_LEN = 6,
136 a_ME_CTES_S8B_OFF = 41, a_ME_CTES_S8B_LEN = 2,
137 a_ME_CTES_SB64_OFF = 44, a_ME_CTES_SB64_LEN = 3,
138 a_ME_CTES_SQP_OFF = 48, a_ME_CTES_SQP_LEN = 2
141 /* Check whether *s must be quoted according to flags, else body rules;
142 * sol indicates whether we are at the first character of a line/field */
143 SINLINE enum a_me_qact a_me_mustquote(char const *s, char const *e, bool_t sol,
144 enum mime_enc_flags flags);
146 /* Trim WS and make work point to the decodable range of in.
147 * Return the amount of bytes a b64_decode operation on that buffer requires,
148 * or UIZ_MAX on overflow error */
149 static size_t a_me_b64_decode_prepare(struct str *work, struct str const *in);
151 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
152 * Return number of useful bytes in out or -1 on error.
153 * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
154 static ssize_t a_me_b64_decode(struct str *out, struct str *in);
156 SINLINE enum a_me_qact
157 a_me_mustquote(char const *s, char const *e, bool_t sol,
158 enum mime_enc_flags flags){
159 ui8_t const *qtab;
160 enum a_me_qact a, r;
161 NYD2_ENTER;
163 qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
164 ? a_me_qp_head : a_me_qp_body;
166 if((ui8_t)*s > 0x7F){
167 r = a_ME_Q;
168 goto jleave;
171 a = qtab[(ui8_t)*s];
173 if((r = a) == a_ME_N || a == a_ME_Q)
174 goto jleave;
176 r = a_ME_Q;
178 /* Special header fields */
179 if(flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)){
180 /* Special massage for encoded words */
181 if(flags & MIMEEF_ISENCWORD){
182 switch(a){
183 case a_ME_HT:
184 case a_ME_US:
185 case a_ME_EQ:
186 r = a;
187 /* FALLTHRU */
188 case a_ME_UU:
189 goto jleave;
190 default:
191 break;
195 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
196 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
197 * should be hard to match */
198 if(a == a_ME_QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
199 goto jleave;
200 goto jnquote;
203 /* Body-only */
205 if(a == a_ME_SP){
206 /* WS only if trailing white space */
207 if(&s[1] == e || s[1] == '\n')
208 goto jleave;
209 goto jnquote;
212 /* Rest are special begin-of-line cases */
213 if(!sol)
214 goto jnquote;
216 /* ^From */
217 if(a == a_ME_XF){
218 if(&s[4] < e && s[1] == 'r' && s[2] == 'o' && s[3] == 'm' && s[4] == ' ')
219 goto jleave;
220 goto jnquote;
222 /* ^.$ */
223 if(a == a_ME_XD && (&s[1] == e || s[1] == '\n'))
224 goto jleave;
225 jnquote:
226 r = 0;
227 jleave:
228 NYD2_LEAVE;
229 return r;
232 static size_t
233 a_me_b64_decode_prepare(struct str *work, struct str const *in){
234 size_t cp_len;
235 NYD2_ENTER;
237 *work = *in;
238 cp_len = n_str_trim(work)->l;
240 if(cp_len > 16){
241 /* n_ERR_OVERFLOW */
242 if(UIZ_MAX / 3 <= cp_len){
243 cp_len = UIZ_MAX;
244 goto jleave;
246 cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
248 cp_len += (2 * 3) +1;
249 jleave:
250 NYD2_LEAVE;
251 return cp_len;
254 static ssize_t
255 a_me_b64_decode(struct str *out, struct str *in){
256 ui8_t *p;
257 ui8_t const *q, *end;
258 ssize_t rv;
259 NYD2_ENTER;
261 rv = -1;
262 p = (ui8_t*)&out->s[out->l];
263 q = (ui8_t const*)in->s;
265 for(end = &q[in->l]; PTR2SIZE(end - q) >= 4; q += 4){
266 ui32_t a, b, c, d;
268 a = a_ME_B64_DECUI8(q[0]);
269 b = a_ME_B64_DECUI8(q[1]);
270 c = a_ME_B64_DECUI8(q[2]);
271 d = a_ME_B64_DECUI8(q[3]);
273 if(n_UNLIKELY(a >= a_ME_B64_EQU || b >= a_ME_B64_EQU ||
274 c == a_ME_B64_BAD || d == a_ME_B64_BAD))
275 goto jleave;
277 *p++ = ((a << 2) | ((b & 0x30) >> 4));
278 if(c == a_ME_B64_EQU){ /* got '=' */
279 q += 4;
280 if(n_UNLIKELY(d != a_ME_B64_EQU))
281 goto jleave;
282 break;
285 *p++ = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
286 if(d == a_ME_B64_EQU) /* got '=' */
287 break;
288 *p++ = (((c & 0x03) << 6) | d);
290 rv ^= rv;
292 jleave:{
293 size_t i;
295 i = PTR2SIZE((char*)p - out->s);
296 out->l = i;
297 if(rv == 0)
298 rv = (ssize_t)i;
300 in->l -= PTR2SIZE(q - (ui8_t*)in->s);
301 in->s = n_UNCONST(q);
302 NYD2_LEAVE;
303 return rv;
306 FL enum mime_enc
307 mime_enc_target(void){
308 char const *cp;
309 enum mime_enc rv;
310 NYD2_ENTER;
312 if((cp = ok_vlook(encoding)) == NULL)
313 rv = MIME_DEFAULT_ENCODING;
314 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_S8B_OFF]) ||
315 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_8B_OFF]))
316 rv = MIMEE_8B;
317 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SB64_OFF]) ||
318 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_B64_OFF]))
319 rv = MIMEE_B64;
320 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SQP_OFF]) ||
321 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_QP_OFF]))
322 rv = MIMEE_QP;
323 else{
324 n_err(_("Warning: invalid *encoding*, using Base64: %s\n"), cp);
325 rv = MIMEE_B64;
327 NYD2_LEAVE;
328 return rv;
331 FL enum mime_enc
332 mime_enc_from_ctehead(char const *hbody){
333 enum mime_enc rv;
334 NYD2_ENTER;
336 if(hbody == NULL)
337 rv = MIMEE_7B;
338 else{
339 struct{
340 ui8_t off;
341 ui8_t len;
342 ui8_t enc;
343 ui8_t __dummy;
344 } const *cte, cte_base[] = {
345 {a_ME_CTES_7B_OFF, a_ME_CTES_7B_LEN, MIMEE_7B, 0},
346 {a_ME_CTES_8B_OFF, a_ME_CTES_8B_LEN, MIMEE_8B, 0},
347 {a_ME_CTES_B64_OFF, a_ME_CTES_B64_LEN, MIMEE_B64, 0},
348 {a_ME_CTES_QP_OFF, a_ME_CTES_QP_LEN, MIMEE_QP, 0},
349 {a_ME_CTES_BIN_OFF, a_ME_CTES_BIN_LEN, MIMEE_BIN, 0},
350 {0, 0, MIMEE_NONE, 0}
352 union {char const *s; size_t l;} u;
354 if(*hbody == '"')
355 for(u.s = ++hbody; *u.s != '\0' && *u.s != '"'; ++u.s)
357 else
358 for(u.s = hbody; *u.s != '\0' && !whitechar(*u.s); ++u.s)
360 u.l = PTR2SIZE(u.s - hbody);
362 for(cte = cte_base;;)
363 if(cte->len == u.l && !asccasecmp(&a_me_ctes[cte->off], hbody)){
364 rv = cte->enc;
365 break;
366 }else if((++cte)->enc == MIMEE_NONE){
367 rv = MIMEE_NONE;
368 break;
371 NYD2_LEAVE;
372 return rv;
375 FL char const *
376 mime_enc_from_conversion(enum conversion const convert){
377 char const *rv;
378 NYD2_ENTER;
380 switch(convert){
381 case CONV_7BIT: rv = &a_me_ctes[a_ME_CTES_7B_OFF]; break;
382 case CONV_8BIT: rv = &a_me_ctes[a_ME_CTES_8B_OFF]; break;
383 case CONV_TOQP: rv = &a_me_ctes[a_ME_CTES_QP_OFF]; break;
384 case CONV_TOB64: rv = &a_me_ctes[a_ME_CTES_B64_OFF]; break;
385 case CONV_NONE: rv = &a_me_ctes[a_ME_CTES_BIN_OFF]; break;
386 default: rv = n_empty; break;
388 NYD2_LEAVE;
389 return rv;
392 FL size_t
393 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags){
394 size_t rv;
395 bool_t sol;
396 NYD2_ENTER;
398 for(rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
399 switch(a_me_mustquote(ln, ln + lnlen, sol, flags)){
400 case a_ME_US:
401 case a_ME_EQ:
402 case a_ME_HT:
403 assert(flags & MIMEEF_ISENCWORD);
404 /* FALLTHRU */
405 case 0:
406 continue;
407 default:
408 ++rv;
410 NYD2_LEAVE;
411 return rv;
414 FL size_t
415 qp_encode_calc_size(size_t len){
416 size_t bytes, lines;
417 NYD2_ENTER;
419 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
420 * However, we must be aware that (a) the output may span multiple lines
421 * and (b) the input does not end with a newline itself (nonetheless):
422 * LC_ALL=C awk 'BEGIN{
423 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
424 * }' |
425 * s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
427 /* Several n_ERR_OVERFLOW */
428 if(len >= UIZ_MAX / 3){
429 len = UIZ_MAX;
430 goto jleave;
432 bytes = len * 3;
433 lines = bytes / QP_LINESIZE;
434 len += lines;
436 if(len >= UIZ_MAX / 3){
437 len = UIZ_MAX;
438 goto jleave;
440 /* Trailing hard NL may be missing, so there may be two lines.
441 * Thus add soft + hard NL per line and a trailing NUL */
442 bytes = len * 3;
443 lines = (bytes / QP_LINESIZE) + 1;
444 lines <<= 1;
445 ++bytes;
446 /*if(UIZ_MAX - bytes >= lines){
447 len = UIZ_MAX;
448 goto jleave;
450 bytes += lines;
451 len = bytes;
452 jleave:
453 NYD2_LEAVE;
454 return len;
457 #ifdef notyet
458 FL struct str *
459 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags){
460 struct str in;
461 NYD_ENTER;
463 in.s = n_UNCONST(cp);
464 in.l = strlen(cp);
465 out = qp_encode(out, &in, flags);
466 NYD_LEAVE;
467 return out;
470 FL struct str *
471 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
472 enum qpflags flags){
473 struct str in;
474 NYD_ENTER;
476 in.s = n_UNCONST(vp);
477 in.l = vp_len;
478 out = qp_encode(out, &in, flags);
479 NYD_LEAVE;
480 return out;
482 #endif /* notyet */
484 FL struct str *
485 qp_encode(struct str *out, struct str const *in, enum qpflags flags){
486 size_t lnlen;
487 char *qp;
488 char const *is, *ie;
489 bool_t sol, seenx;
490 NYD_ENTER;
492 sol = (flags & QP_ISHEAD ? FAL0 : TRU1);
494 if(!(flags & QP_BUF)){
495 if((lnlen = qp_encode_calc_size(in->l)) == UIZ_MAX){
496 out = NULL;
497 goto jerr;
499 out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
501 qp = out->s;
502 is = in->s;
503 ie = is + in->l;
505 if(flags & QP_ISHEAD){
506 enum mime_enc_flags ef;
508 ef = MIMEEF_ISHEAD | (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
510 for(seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp){
511 char c;
512 enum a_me_qact mq;
514 mq = a_me_mustquote(is, ie, sol, ef);
515 c = *is++;
517 if(mq == a_ME_N){
518 /* We convert into a single *encoded-word*, that'll end up in
519 * =?C?Q??=; quote '?' from when we're inside there on */
520 if(seenx && c == '?')
521 goto jheadq;
522 *qp = c;
523 }else if(mq == a_ME_US)
524 *qp = a_ME_US;
525 else{
526 seenx = TRU1;
527 jheadq:
528 *qp++ = '=';
529 qp = n_c_to_hex_base16(qp, c) + 1;
532 goto jleave;
535 /* The body needs to take care for soft line breaks etc. */
536 for(lnlen = 0, seenx = FAL0; is < ie; sol = FAL0){
537 char c;
538 enum a_me_qact mq;
540 mq = a_me_mustquote(is, ie, sol, MIMEEF_NONE);
541 c = *is++;
543 if(mq == a_ME_N && (c != '\n' || !seenx)){
544 *qp++ = c;
545 if(++lnlen < QP_LINESIZE - 1)
546 continue;
547 /* Don't write a soft line break when we're in the last possible
548 * column and either an LF has been written or only an LF follows, as
549 * that'll end the line anyway */
550 /* XXX but - ensure is+1>=ie, then??
551 * xxx and/or - what about resetting lnlen; that contra
552 * xxx dicts input==1 input line assertion, though */
553 if(c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
554 continue;
555 jsoftnl:
556 qp[0] = '=';
557 qp[1] = '\n';
558 qp += 2;
559 lnlen = 0;
560 continue;
563 if(lnlen > QP_LINESIZE - 3 - 1){
564 qp[0] = '=';
565 qp[1] = '\n';
566 qp += 2;
567 lnlen = 0;
569 *qp++ = '=';
570 qp = n_c_to_hex_base16(qp, c);
571 qp += 2;
572 lnlen += 3;
573 if(c != '\n' || !seenx)
574 seenx = (c == '\r');
575 else{
576 seenx = FAL0;
577 goto jsoftnl;
581 /* Enforce soft line break if we haven't seen LF */
582 if(in->l > 0 && *--is != '\n'){
583 qp[0] = '=';
584 qp[1] = '\n';
585 qp += 2;
587 jleave:
588 out->l = PTR2SIZE(qp - out->s);
589 out->s[out->l] = '\0';
590 jerr:
591 NYD_LEAVE;
592 return out;
595 FL bool_t
596 qp_decode_header(struct str *out, struct str const *in){
597 struct n_string s;
598 char const *is, *ie;
599 NYD_ENTER;
601 /* n_ERR_OVERFLOW */
602 if(UIZ_MAX -1 - out->l <= in->l ||
603 SI32_MAX <= out->l + in->l){ /* XXX wrong, we may replace */
604 out->l = 0;
605 out = NULL;
606 goto jleave;
609 n_string_creat(&s);
610 n_string_reserve(n_string_take_ownership(&s, out->s,
611 (out->l == 0 ? 0 : out->l +1), out->l),
612 in->l + (in->l >> 2));
614 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
615 si32_t c;
617 c = *is++;
618 if(c == '='){
619 if(is >= ie){
620 goto jpushc; /* TODO According to RFC 2045, 6.7,
621 * ++is; TODO we should warn the user, but have no context
622 * goto jehead; TODO to do so; can't over and over */
623 }else if((c = n_c_from_hex_base16(is)) >= 0){
624 is += 2;
625 goto jpushc;
626 }else{
627 /* Invalid according to RFC 2045, section 6.7 */
628 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
629 c = '=';
630 goto jpushc;
631 /* TODO jehead:
632 * TODO if(n_psonce & n_PSO_UNICODE)
633 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
634 * TODO else{
635 * TODO c = '?';
636 * TODO goto jpushc;
637 * TODO }*/
639 }else{
640 jpushc:
641 if(c == '_' /* a_ME_US */)
642 c = ' ';
643 n_string_push_c(&s, (char)c);
647 out->s = n_string_cp(&s);
648 out->l = s.s_len;
649 n_string_gut(n_string_drop_ownership(&s));
650 jleave:
651 NYD_LEAVE;
652 return (out != NULL);
655 FL bool_t
656 qp_decode_part(struct str *out, struct str const *in, struct str *outrest,
657 struct str *inrest_or_null){
658 struct n_string s, *sp;
659 char const *is, *ie;
660 NYD_ENTER;
662 if(outrest->l != 0){
663 is = out->s;
664 *out = *outrest;
665 outrest->s = n_UNCONST(is);
666 outrest->l = 0;
669 /* n_ERR_OVERFLOW */
670 if(UIZ_MAX -1 - out->l <= in->l ||
671 SI32_MAX <= out->l + in->l) /* XXX wrong, we may replace */
672 goto jerr;
674 sp = n_string_creat(&s);
675 sp = n_string_take_ownership(sp, out->s,
676 (out->l == 0 ? 0 : out->l +1), out->l);
677 sp = n_string_reserve(sp, in->l + (in->l >> 2));
679 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
680 si32_t c;
682 if((c = *is++) != '='){
683 jpushc:
684 n_string_push_c(&s, (char)c);
685 continue;
688 /* RFC 2045, 6.7:
689 * Therefore, when decoding a Quoted-Printable body, any
690 * trailing white space on a line must be deleted, as it will
691 * necessarily have been added by intermediate transport
692 * agents */
693 for(; is <= ie && blankchar(*is); ++is)
695 if(is >= ie){
696 /* Soft line break? */
697 if(*is == '\n')
698 goto jsoftnl;
699 goto jpushc; /* TODO According to RFC 2045, 6.7,
700 * ++is; TODO we should warn the user, but have no context
701 * goto jebody; TODO to do so; can't over and over */
704 /* Not a soft line break? */
705 if(*is != '\n'){
706 if((c = n_c_from_hex_base16(is)) >= 0){
707 is += 2;
708 goto jpushc;
710 /* Invalid according to RFC 2045, section 6.7 */
711 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
712 c = '=';
713 goto jpushc;
714 /* TODO jebody:
715 * TODO if(n_psonce & n_PSO_UNICODE)
716 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
717 * TODO else{
718 * TODO c = '?';
719 * TODO goto jpushc;
720 * TODO }*/
723 /* CRLF line endings are encoded as QP, followed by a soft line break, so
724 * check for this special case, and simply forget we have seen one, so as
725 * not to end up with the entire DOS file in a contiguous buffer */
726 jsoftnl:
727 if(s.s_len > 0 && s.s_dat[s.s_len - 1] == '\n'){
728 #if 0 /* TODO qp_decode_part() we do not normalize CRLF
729 * TODO to LF because for that we would need
730 * TODO to know if we are about to write to
731 * TODO the display or do save the file!
732 * TODO 'hope the MIME/send layer rewrite will
733 * TODO offer the possibility to DTRT */
734 if(s.s_len > 1 && s.s_dat[s.s_len - 2] == '\r')
735 n_string_push_c(n_string_trunc(&s, s.s_len - 2), '\n');
736 #endif
737 break;
740 /* C99 */{
741 char *cp;
742 size_t l;
744 if((l = PTR2SIZE(ie - is)) > 0){
745 if(inrest_or_null == NULL)
746 goto jerr;
747 n_str_assign_buf(inrest_or_null, is, l);
749 cp = outrest->s;
750 outrest->s = n_string_cp(&s);
751 outrest->l = s.s_len;
752 n_string_drop_ownership(&s);
753 if(cp != NULL)
754 free(cp);
756 break;
759 out->s = n_string_cp(&s);
760 out->l = s.s_len;
761 n_string_gut(n_string_drop_ownership(&s));
762 jleave:
763 NYD_LEAVE;
764 return (out != NULL);
765 jerr:
766 out->l = 0;
767 out = NULL;
768 goto jleave;
771 FL size_t
772 b64_encode_calc_size(size_t len){
773 NYD2_ENTER;
774 if(len >= UIZ_MAX / 4)
775 len = UIZ_MAX;
776 else{
777 len = (len * 4) / 3;
778 len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
779 len += 2 + 1; /* CRLF, \0 */
781 NYD2_LEAVE;
782 return len;
785 FL struct str *
786 b64_encode(struct str *out, struct str const *in, enum b64flags flags){
787 ui8_t const *p;
788 size_t i, lnlen;
789 char *b64;
790 NYD_ENTER;
792 assert(!(flags & B64_NOPAD) ||
793 !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
795 p = (ui8_t const*)in->s;
797 if(!(flags & B64_BUF)){
798 if((i = b64_encode_calc_size(in->l)) == UIZ_MAX){
799 out = NULL;
800 goto jleave;
802 out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
804 b64 = out->s;
806 if(!(flags & (B64_CRLF | B64_LF)))
807 flags &= ~B64_MULTILINE;
809 for(lnlen = 0, i = in->l; (ssize_t)i > 0; p += 3, i -= 3){
810 ui32_t a, b, c;
812 a = p[0];
813 b64[0] = a_me_b64_enctbl[a >> 2];
815 switch(i){
816 case 1:
817 b64[1] = a_me_b64_enctbl[((a & 0x3) << 4)];
818 b64[2] =
819 b64[3] = '=';
820 break;
821 case 2:
822 b = p[1];
823 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
824 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2)];
825 b64[3] = '=';
826 break;
827 default:
828 b = p[1];
829 c = p[2];
830 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
831 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
832 b64[3] = a_me_b64_enctbl[c & 0x3F];
833 break;
836 b64 += 4;
837 if(!(flags & B64_MULTILINE))
838 continue;
839 lnlen += 4;
840 if(lnlen < B64_LINESIZE)
841 continue;
843 lnlen = 0;
844 if(flags & B64_CRLF)
845 *b64++ = '\r';
846 if(flags & (B64_CRLF | B64_LF))
847 *b64++ = '\n';
850 if((flags & (B64_CRLF | B64_LF)) &&
851 (!(flags & B64_MULTILINE) || lnlen != 0)){
852 if(flags & B64_CRLF)
853 *b64++ = '\r';
854 if(flags & (B64_CRLF | B64_LF))
855 *b64++ = '\n';
856 }else if(flags & B64_NOPAD)
857 while(b64 != out->s && b64[-1] == '=')
858 --b64;
860 out->l = PTR2SIZE(b64 - out->s);
861 out->s[out->l] = '\0';
863 /* Base64 includes + and /, replace them with _ and -.
864 * This is base64url according to RFC 4648, then. Since we only support
865 * that for encoding and it is only used for boundary strings, this is
866 * yet a primitive implementation; xxx use tables; support decoding */
867 if(flags & B64_RFC4648URL){
868 char c;
870 for(b64 = out->s; (c = *b64) != '\0'; ++b64)
871 if(c == '+')
872 *b64 = '-';
873 else if(c == '/')
874 *b64 = '_';
876 jleave:
877 NYD_LEAVE;
878 return out;
881 FL struct str *
882 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
883 enum b64flags flags){
884 struct str in;
885 NYD_ENTER;
887 in.s = n_UNCONST(vp);
888 in.l = vp_len;
889 out = b64_encode(out, &in, flags);
890 NYD_LEAVE;
891 return out;
894 #ifdef notyet
895 FL struct str *
896 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags){
897 struct str in;
898 NYD_ENTER;
900 in.s = n_UNCONST(cp);
901 in.l = strlen(cp);
902 out = b64_encode(out, &in, flags);
903 NYD_LEAVE;
904 return out;
906 #endif /* notyet */
908 FL bool_t
909 b64_decode(struct str *out, struct str const *in){
910 struct str work;
911 size_t len;
912 NYD_ENTER;
914 out->l = 0;
916 if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
917 goto jerr;
919 /* Ignore an empty input, as may happen for an empty final line */
920 if(work.l == 0)
921 out->s = srealloc(out->s, 1);
922 else if(work.l >= 4 && !(work.l & 3)){
923 out->s = srealloc(out->s, len +1);
924 if((ssize_t)(len = a_me_b64_decode(out, &work)) < 0)
925 goto jerr;
926 }else
927 goto jerr;
928 out->s[out->l] = '\0';
929 jleave:
930 NYD_LEAVE;
931 return (out != NULL);
932 jerr:
933 out = NULL;
934 goto jleave;
937 FL bool_t
938 b64_decode_header(struct str *out, struct str const *in){
939 struct str outr, inr;
940 NYD_ENTER;
942 if(!b64_decode(out, in)){
943 memset(&outr, 0, sizeof outr);
944 memset(&inr, 0, sizeof inr);
946 if(!b64_decode_part(out, in, &outr, &inr) || outr.l > 0 || inr.l > 0)
947 out = NULL;
949 if(inr.s != NULL)
950 free(inr.s);
951 if(outr.s != NULL)
952 free(outr.s);
954 NYD_LEAVE;
955 return (out != NULL);
958 FL bool_t
959 b64_decode_part(struct str *out, struct str const *in, struct str *outrest,
960 struct str *inrest_or_null){
961 struct str work, save;
962 ui32_t a, b, c, b64l;
963 char ca, cb, cc, cx;
964 struct n_string s, workbuf;
965 size_t len;
966 NYD_ENTER;
968 n_string_creat(&s);
969 if((len = out->l) > 0 && out->s[len] == '\0')
970 n_string_take_ownership(&s, out->s, len +1, len);
971 else{
972 if(len > 0)
973 n_string_push_buf(&s, out->s, len);
974 if(out->s != NULL)
975 free(out->s);
977 out->s = NULL, out->l = 0;
978 n_string_creat(&workbuf);
980 if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
981 goto jerr;
983 if(outrest->l > 0){
984 n_string_push_buf(&s, outrest->s, outrest->l);
985 outrest->l = 0;
988 /* n_ERR_OVERFLOW */
989 if(UIZ_MAX - len <= s.s_len ||
990 SI32_MAX <= len + s.s_len) /* XXX wrong, we may replace */
991 goto jerr;
993 if(work.l == 0)
994 goto jok;
996 /* This text decoder is extremely expensive, especially given that in all
997 * but _invalid_ cases it is not even needed! So try once to do the normal
998 * decoding, if that fails, go the hard way */
999 save = work;
1000 out->s = n_string_resize(&s, len + (out->l = b64l = s.s_len))->s_dat;
1002 if(work.l >= 4 && a_me_b64_decode(out, &work) >= 0){
1003 n_string_trunc(&s, out->l);
1004 if(work.l == 0)
1005 goto jok;
1008 n_string_trunc(&s, b64l);
1009 work = save;
1010 out->s = NULL, out->l = 0;
1012 n_UNINIT(ca, 0);
1013 n_UNINIT(cb, 0);
1014 n_UNINIT(cc, 0);
1015 for(b64l = 0;;){
1016 ui32_t x;
1018 x = a_ME_B64_DECUI8((ui8_t)(cx = *work.s));
1019 switch(b64l){
1020 case 0:
1021 if(x >= a_ME_B64_EQU)
1022 goto jrepl;
1023 ca = cx;
1024 a = x;
1025 ++b64l;
1026 break;
1027 case 1:
1028 if(x >= a_ME_B64_EQU)
1029 goto jrepl;
1030 cb = cx;
1031 b = x;
1032 ++b64l;
1033 break;
1034 case 2:
1035 if(x == a_ME_B64_BAD)
1036 goto jrepl;
1037 cc = cx;
1038 c = x;
1039 ++b64l;
1040 break;
1041 case 3:
1042 if(x == a_ME_B64_BAD){
1043 jrepl:
1044 /* TODO This would be wrong since iconv(3) may be applied first! */
1045 #if 0
1046 if(n_psonce & n_PSO_UNICODE)
1047 n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
1048 else
1049 n_string_push_c(&s, '?');
1050 #endif
1052 }else if(c == a_ME_B64_EQU && x != a_ME_B64_EQU){
1053 /* This is not only invalid but bogus. Skip it over! */
1054 /* TODO This would be wrong since iconv(3) may be applied first! */
1055 #if 0
1056 n_string_push_buf(&s, n_UNIREPL n_UNIREPL n_UNIREPL n_UNIREPL,
1057 (sizeof(n_UNIREPL) -1) * 4);
1058 #endif
1059 b64l = 0;
1060 }else{
1061 n_string_push_c(&s, (char)((a << 2) | ((b & 0x30) >> 4)));
1062 n_string_push_c(&s, (char)(((b & 0x0F) << 4) | ((c & 0x3C) >> 2)));
1063 if(x != a_ME_B64_EQU)
1064 n_string_push_c(&s, (char)(((c & 0x03) << 6) | x));
1065 ++b64l;
1067 break;
1070 ++work.s;
1071 if(--work.l == 0){
1072 if(b64l > 0 && b64l != 4){
1073 if(inrest_or_null == NULL)
1074 goto jerr;
1075 inrest_or_null->s = srealloc(inrest_or_null->s, b64l +1);
1076 inrest_or_null->s[0] = ca;
1077 if(b64l > 1)
1078 inrest_or_null->s[1] = cb;
1079 if(b64l > 2)
1080 inrest_or_null->s[2] = cc;
1081 inrest_or_null->s[inrest_or_null->l = b64l] = '\0';
1083 goto jok;
1085 if(b64l == 4)
1086 b64l = 0;
1089 jok:
1090 out->s = n_string_cp(&s);
1091 out->l = s.s_len;
1092 n_string_drop_ownership(&s);
1093 jleave:
1094 n_string_gut(&workbuf);
1095 n_string_gut(&s);
1096 NYD_LEAVE;
1097 return (out != NULL);
1098 jerr:
1099 out = NULL;
1100 goto jleave;
1103 /* s-it-mode */