Add n_hy[] ("-"), use it
[s-mailx.git] / mime-enc.c
blob3418d9dc90464353960ab45b21ca8c494298dcf4
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
3 *@ for _header() versions: including "encoded word" as of RFC 2049):
4 *@ - Quoted-Printable, section 6.7
5 *@ - Base64, section 6.8
6 *@ TODO We have no notion of a "current message context" and thus badly log.
7 *@ TODO This is not final yet, v15 will bring "filters".
9 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
10 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
13 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
15 * Copyright (c) 2006 The NetBSD Foundation, Inc.
16 * All rights reserved.
18 * This code is derived from software contributed to The NetBSD Foundation
19 * by Anon Ymous.
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
23 * are met:
24 * 1. Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * 2. Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in the
28 * documentation and/or other materials provided with the distribution.
30 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
42 #undef n_FILE
43 #define n_FILE mime_enc
45 #ifndef HAVE_AMALGAMATION
46 # include "nail.h"
47 #endif
49 enum a_me_qact{
50 a_ME_N = 0,
51 a_ME_Q = 1, /* Must quote */
52 a_ME_SP = 2, /* sp */
53 a_ME_XF = 3, /* Special character 'F' - maybe quoted */
54 a_ME_XD = 4, /* Special character '.' - maybe quoted */
55 a_ME_UU = 5, /* In header, _ must be quoted in encoded word */
56 a_ME_US = '_', /* In header, ' ' must be quoted as _ in encoded word */
57 a_ME_QM = '?', /* In header, special character ? not always quoted */
58 a_ME_EQ = '=', /* In header, '=' must be quoted in encoded word */
59 a_ME_HT ='\t', /* Body HT=SP. Head HT=HT, BUT quote in encoded word */
60 a_ME_NL = 0, /* Don't quote '\n' (NL) */
61 a_ME_CR = a_ME_Q /* Always quote a '\r' (CR) */
64 /* Lookup tables to decide whether a character must be encoded or not.
65 * Email header differences according to RFC 2047, section 4.2:
66 * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
67 * - don't care about the special ^F[rom] and ^.$ */
68 static ui8_t const a_me_qp_body[] = {
69 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
70 a_ME_Q, a_ME_SP, a_ME_NL, a_ME_Q, a_ME_Q, a_ME_CR, a_ME_Q, a_ME_Q,
71 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
72 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
73 a_ME_SP, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
74 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XD, a_ME_N,
75 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
76 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q, a_ME_N, a_ME_N,
78 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_XF, a_ME_N,
79 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
80 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
81 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
82 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
83 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
84 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
85 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
86 }, a_me_qp_head[] = {
87 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
88 a_ME_Q, a_ME_HT, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
89 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
90 a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q, a_ME_Q,
91 a_ME_US, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
92 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
93 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
94 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_EQ, a_ME_N, a_ME_QM,
96 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
97 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
98 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
99 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_UU,
100 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
101 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
102 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N,
103 a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_N, a_ME_Q,
106 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
107 static char const a_me_b64_enctbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
108 "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
109 static signed char const a_me_b64__dectbl[] = {
110 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
111 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
112 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
113 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
114 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
115 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
116 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
117 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
119 #define a_ME_B64_EQU (ui32_t)-2
120 #define a_ME_B64_BAD (ui32_t)-1
121 #define a_ME_B64_DECUI8(C) \
122 ((ui8_t)(C) >= sizeof(a_me_b64__dectbl)\
123 ? a_ME_B64_BAD : (ui32_t)a_me_b64__dectbl[(ui8_t)(C)])
125 /* (Ugly to place an enum here) */
126 static char const a_me_ctes[] = "7bit\0" "8bit\0" \
127 "base64\0" "quoted-printable\0" "binary\0" \
128 /* abbrevs */ "8b\0" "b64\0" "qp\0";
129 enum a_me_ctes_off{
130 a_ME_CTES_7B_OFF = 0, a_ME_CTES_7B_LEN = 4,
131 a_ME_CTES_8B_OFF = 5, a_ME_CTES_8B_LEN = 4,
132 a_ME_CTES_B64_OFF = 10, a_ME_CTES_B64_LEN = 6,
133 a_ME_CTES_QP_OFF = 17, a_ME_CTES_QP_LEN = 16,
134 a_ME_CTES_BIN_OFF = 34, a_ME_CTES_BIN_LEN = 6,
136 a_ME_CTES_S8B_OFF = 41, a_ME_CTES_S8B_LEN = 2,
137 a_ME_CTES_SB64_OFF = 44, a_ME_CTES_SB64_LEN = 3,
138 a_ME_CTES_SQP_OFF = 48, a_ME_CTES_SQP_LEN = 2
141 /* Check whether *s must be quoted according to flags, else body rules;
142 * sol indicates whether we are at the first character of a line/field */
143 n_INLINE enum a_me_qact a_me_mustquote(char const *s, char const *e,
144 bool_t sol, enum mime_enc_flags flags);
146 /* Trim WS and make work point to the decodable range of in.
147 * Return the amount of bytes a b64_decode operation on that buffer requires,
148 * or UIZ_MAX on overflow error */
149 static size_t a_me_b64_decode_prepare(struct str *work, struct str const *in);
151 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
152 * Return number of useful bytes in out or -1 on error.
153 * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
154 static ssize_t a_me_b64_decode(struct str *out, struct str *in);
156 n_INLINE enum a_me_qact
157 a_me_mustquote(char const *s, char const *e, bool_t sol,
158 enum mime_enc_flags flags){
159 ui8_t const *qtab;
160 enum a_me_qact a, r;
161 NYD2_ENTER;
163 qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
164 ? a_me_qp_head : a_me_qp_body;
166 if((ui8_t)*s > 0x7F){
167 r = a_ME_Q;
168 goto jleave;
171 a = qtab[(ui8_t)*s];
173 if((r = a) == a_ME_N || a == a_ME_Q)
174 goto jleave;
176 r = a_ME_Q;
178 /* Special header fields */
179 if(flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)){
180 /* Special massage for encoded words */
181 if(flags & MIMEEF_ISENCWORD){
182 switch(a){
183 case a_ME_HT:
184 case a_ME_US:
185 case a_ME_EQ:
186 r = a;
187 /* FALLTHRU */
188 case a_ME_UU:
189 goto jleave;
190 default:
191 break;
195 /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
196 * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
197 * should be hard to match */
198 if(a == a_ME_QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
199 goto jleave;
200 goto jnquote;
203 /* Body-only */
205 if(a == a_ME_SP){
206 /* WS only if trailing white space */
207 if(&s[1] == e || s[1] == '\n')
208 goto jleave;
209 goto jnquote;
212 /* Rest are special begin-of-line cases */
213 if(!sol)
214 goto jnquote;
216 /* ^From */
217 if(a == a_ME_XF){
218 if(&s[4] < e && s[1] == 'r' && s[2] == 'o' && s[3] == 'm' && s[4] == ' ')
219 goto jleave;
220 goto jnquote;
222 /* ^.$ */
223 if(a == a_ME_XD && (&s[1] == e || s[1] == '\n'))
224 goto jleave;
225 jnquote:
226 r = 0;
227 jleave:
228 NYD2_LEAVE;
229 return r;
232 static size_t
233 a_me_b64_decode_prepare(struct str *work, struct str const *in){
234 size_t cp_len;
235 NYD2_ENTER;
237 *work = *in;
238 cp_len = n_str_trim(work, n_STR_TRIM_BOTH)->l;
240 if(cp_len > 16){
241 /* n_ERR_OVERFLOW */
242 if(UIZ_MAX / 3 <= cp_len){
243 cp_len = UIZ_MAX;
244 goto jleave;
246 cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
248 cp_len += (2 * 3) +1;
249 jleave:
250 NYD2_LEAVE;
251 return cp_len;
254 static ssize_t
255 a_me_b64_decode(struct str *out, struct str *in){
256 ui8_t *p, pb;
257 ui8_t const *q, *end;
258 ssize_t rv;
259 NYD2_ENTER;
261 rv = -1;
262 p = (ui8_t*)&out->s[out->l];
263 q = (ui8_t const*)in->s;
265 for(end = &q[in->l]; PTR2SIZE(end - q) >= 4; q += 4){
266 ui32_t a, b, c, d;
268 a = a_ME_B64_DECUI8(q[0]);
269 b = a_ME_B64_DECUI8(q[1]);
270 c = a_ME_B64_DECUI8(q[2]);
271 d = a_ME_B64_DECUI8(q[3]);
273 if(n_UNLIKELY(a >= a_ME_B64_EQU || b >= a_ME_B64_EQU ||
274 c == a_ME_B64_BAD || d == a_ME_B64_BAD))
275 goto jleave;
277 pb = ((a << 2) | ((b & 0x30) >> 4));
278 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
279 *p++ = pb;
281 if(c == a_ME_B64_EQU){ /* got '=' */
282 q += 4;
283 if(n_UNLIKELY(d != a_ME_B64_EQU))
284 goto jleave;
285 break;
288 pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
289 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
290 *p++ = pb;
292 if(d == a_ME_B64_EQU) /* got '=' */
293 break;
294 pb = (((c & 0x03) << 6) | d);
295 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
296 *p++ = pb;
298 rv ^= rv;
300 jleave:{
301 size_t i;
303 i = PTR2SIZE((char*)p - out->s);
304 out->l = i;
305 if(rv == 0)
306 rv = (ssize_t)i;
308 in->l -= PTR2SIZE(q - (ui8_t*)in->s);
309 in->s = n_UNCONST(q);
310 NYD2_LEAVE;
311 return rv;
314 FL enum mime_enc
315 mime_enc_target(void){
316 char const *cp, *v15;
317 enum mime_enc rv;
318 NYD2_ENTER;
320 if((v15 = ok_vlook(encoding)) != NULL)
321 n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
323 if((cp = ok_vlook(mime_encoding)) == NULL && (cp = v15) == NULL)
324 rv = MIME_DEFAULT_ENCODING;
325 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_S8B_OFF]) ||
326 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_8B_OFF]))
327 rv = MIMEE_8B;
328 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SB64_OFF]) ||
329 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_B64_OFF]))
330 rv = MIMEE_B64;
331 else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SQP_OFF]) ||
332 !asccasecmp(cp, &a_me_ctes[a_ME_CTES_QP_OFF]))
333 rv = MIMEE_QP;
334 else{
335 n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp);
336 rv = MIMEE_B64;
338 NYD2_LEAVE;
339 return rv;
342 FL enum mime_enc
343 mime_enc_from_ctehead(char const *hbody){
344 enum mime_enc rv;
345 NYD2_ENTER;
347 if(hbody == NULL)
348 rv = MIMEE_7B;
349 else{
350 struct{
351 ui8_t off;
352 ui8_t len;
353 ui8_t enc;
354 ui8_t __dummy;
355 } const *cte, cte_base[] = {
356 {a_ME_CTES_7B_OFF, a_ME_CTES_7B_LEN, MIMEE_7B, 0},
357 {a_ME_CTES_8B_OFF, a_ME_CTES_8B_LEN, MIMEE_8B, 0},
358 {a_ME_CTES_B64_OFF, a_ME_CTES_B64_LEN, MIMEE_B64, 0},
359 {a_ME_CTES_QP_OFF, a_ME_CTES_QP_LEN, MIMEE_QP, 0},
360 {a_ME_CTES_BIN_OFF, a_ME_CTES_BIN_LEN, MIMEE_BIN, 0},
361 {0, 0, MIMEE_NONE, 0}
363 union {char const *s; size_t l;} u;
365 if(*hbody == '"')
366 for(u.s = ++hbody; *u.s != '\0' && *u.s != '"'; ++u.s)
368 else
369 for(u.s = hbody; *u.s != '\0' && !whitechar(*u.s); ++u.s)
371 u.l = PTR2SIZE(u.s - hbody);
373 for(cte = cte_base;;)
374 if(cte->len == u.l && !asccasecmp(&a_me_ctes[cte->off], hbody)){
375 rv = cte->enc;
376 break;
377 }else if((++cte)->enc == MIMEE_NONE){
378 rv = MIMEE_NONE;
379 break;
382 NYD2_LEAVE;
383 return rv;
386 FL char const *
387 mime_enc_from_conversion(enum conversion const convert){
388 char const *rv;
389 NYD2_ENTER;
391 switch(convert){
392 case CONV_7BIT: rv = &a_me_ctes[a_ME_CTES_7B_OFF]; break;
393 case CONV_8BIT: rv = &a_me_ctes[a_ME_CTES_8B_OFF]; break;
394 case CONV_TOQP: rv = &a_me_ctes[a_ME_CTES_QP_OFF]; break;
395 case CONV_TOB64: rv = &a_me_ctes[a_ME_CTES_B64_OFF]; break;
396 case CONV_NONE: rv = &a_me_ctes[a_ME_CTES_BIN_OFF]; break;
397 default: rv = n_empty; break;
399 NYD2_LEAVE;
400 return rv;
403 FL size_t
404 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags){
405 size_t rv;
406 bool_t sol;
407 NYD2_ENTER;
409 for(rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
410 switch(a_me_mustquote(ln, ln + lnlen, sol, flags)){
411 case a_ME_US:
412 case a_ME_EQ:
413 case a_ME_HT:
414 assert(flags & MIMEEF_ISENCWORD);
415 /* FALLTHRU */
416 case 0:
417 continue;
418 default:
419 ++rv;
421 NYD2_LEAVE;
422 return rv;
425 FL size_t
426 qp_encode_calc_size(size_t len){
427 size_t bytes, lines;
428 NYD2_ENTER;
430 /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
431 * However, we must be aware that (a) the output may span multiple lines
432 * and (b) the input does not end with a newline itself (nonetheless):
433 * LC_ALL=C awk 'BEGIN{
434 * for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
435 * }' |
436 * s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
438 /* Several n_ERR_OVERFLOW */
439 if(len >= UIZ_MAX / 3){
440 len = UIZ_MAX;
441 goto jleave;
443 bytes = len * 3;
444 lines = bytes / QP_LINESIZE;
445 len += lines;
447 if(len >= UIZ_MAX / 3){
448 len = UIZ_MAX;
449 goto jleave;
451 /* Trailing hard NL may be missing, so there may be two lines.
452 * Thus add soft + hard NL per line and a trailing NUL */
453 bytes = len * 3;
454 lines = (bytes / QP_LINESIZE) + 1;
455 lines <<= 1;
456 ++bytes;
457 /*if(UIZ_MAX - bytes >= lines){
458 len = UIZ_MAX;
459 goto jleave;
461 bytes += lines;
462 len = bytes;
463 jleave:
464 NYD2_LEAVE;
465 return len;
468 #ifdef notyet
469 FL struct str *
470 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags){
471 struct str in;
472 NYD_ENTER;
474 in.s = n_UNCONST(cp);
475 in.l = strlen(cp);
476 out = qp_encode(out, &in, flags);
477 NYD_LEAVE;
478 return out;
481 FL struct str *
482 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
483 enum qpflags flags){
484 struct str in;
485 NYD_ENTER;
487 in.s = n_UNCONST(vp);
488 in.l = vp_len;
489 out = qp_encode(out, &in, flags);
490 NYD_LEAVE;
491 return out;
493 #endif /* notyet */
495 FL struct str *
496 qp_encode(struct str *out, struct str const *in, enum qpflags flags){
497 size_t lnlen;
498 char *qp;
499 char const *is, *ie;
500 bool_t sol, seenx;
501 NYD_ENTER;
503 sol = (flags & QP_ISHEAD ? FAL0 : TRU1);
505 if(!(flags & QP_BUF)){
506 if((lnlen = qp_encode_calc_size(in->l)) == UIZ_MAX){
507 out = NULL;
508 goto jerr;
510 out->s = (flags & QP_SALLOC) ? n_autorec_alloc(lnlen)
511 : n_realloc(out->s, lnlen);
513 qp = out->s;
514 is = in->s;
515 ie = is + in->l;
517 if(flags & QP_ISHEAD){
518 enum mime_enc_flags ef;
520 ef = MIMEEF_ISHEAD | (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
522 for(seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp){
523 char c;
524 enum a_me_qact mq;
526 mq = a_me_mustquote(is, ie, sol, ef);
527 c = *is++;
529 if(mq == a_ME_N){
530 /* We convert into a single *encoded-word*, that'll end up in
531 * =?C?Q??=; quote '?' from when we're inside there on */
532 if(seenx && c == '?')
533 goto jheadq;
534 *qp = c;
535 }else if(mq == a_ME_US)
536 *qp = a_ME_US;
537 else{
538 seenx = TRU1;
539 jheadq:
540 *qp++ = '=';
541 qp = n_c_to_hex_base16(qp, c) + 1;
544 goto jleave;
547 /* The body needs to take care for soft line breaks etc. */
548 for(lnlen = 0, seenx = FAL0; is < ie; sol = FAL0){
549 char c;
550 enum a_me_qact mq;
552 mq = a_me_mustquote(is, ie, sol, MIMEEF_NONE);
553 c = *is++;
555 if(mq == a_ME_N && (c != '\n' || !seenx)){
556 *qp++ = c;
557 if(++lnlen < QP_LINESIZE - 1)
558 continue;
559 /* Don't write a soft line break when we're in the last possible
560 * column and either an LF has been written or only an LF follows, as
561 * that'll end the line anyway */
562 /* XXX but - ensure is+1>=ie, then??
563 * xxx and/or - what about resetting lnlen; that contra
564 * xxx dicts input==1 input line assertion, though */
565 if(c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
566 continue;
567 jsoftnl:
568 qp[0] = '=';
569 qp[1] = '\n';
570 qp += 2;
571 lnlen = 0;
572 continue;
575 if(lnlen > QP_LINESIZE - 3 - 1){
576 qp[0] = '=';
577 qp[1] = '\n';
578 qp += 2;
579 lnlen = 0;
581 *qp++ = '=';
582 qp = n_c_to_hex_base16(qp, c);
583 qp += 2;
584 lnlen += 3;
585 if(c != '\n' || !seenx)
586 seenx = (c == '\r');
587 else{
588 seenx = FAL0;
589 goto jsoftnl;
593 /* Enforce soft line break if we haven't seen LF */
594 if(in->l > 0 && *--is != '\n'){
595 qp[0] = '=';
596 qp[1] = '\n';
597 qp += 2;
599 jleave:
600 out->l = PTR2SIZE(qp - out->s);
601 out->s[out->l] = '\0';
602 jerr:
603 NYD_LEAVE;
604 return out;
607 FL bool_t
608 qp_decode_header(struct str *out, struct str const *in){
609 struct n_string s;
610 char const *is, *ie;
611 NYD_ENTER;
613 /* n_ERR_OVERFLOW */
614 if(UIZ_MAX -1 - out->l <= in->l ||
615 SI32_MAX <= out->l + in->l){ /* XXX wrong, we may replace */
616 out->l = 0;
617 out = NULL;
618 goto jleave;
621 n_string_creat(&s);
622 n_string_reserve(n_string_take_ownership(&s, out->s,
623 (out->l == 0 ? 0 : out->l +1), out->l),
624 in->l + (in->l >> 2));
626 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
627 si32_t c;
629 c = *is++;
630 if(c == '='){
631 if(is >= ie){
632 goto jpushc; /* TODO According to RFC 2045, 6.7,
633 * ++is; TODO we should warn the user, but have no context
634 * goto jehead; TODO to do so; can't over and over */
635 }else if((c = n_c_from_hex_base16(is)) >= 0){
636 is += 2;
637 goto jpushc;
638 }else{
639 /* Invalid according to RFC 2045, section 6.7 */
640 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
641 c = '=';
642 goto jpushc;
643 /* TODO jehead:
644 * TODO if(n_psonce & n_PSO_UNICODE)
645 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
646 * TODO else{
647 * TODO c = '?';
648 * TODO goto jpushc;
649 * TODO }*/
651 }else{
652 jpushc:
653 if(c == '_' /* a_ME_US */)
654 c = ' ';
655 n_string_push_c(&s, (char)c);
659 out->s = n_string_cp(&s);
660 out->l = s.s_len;
661 n_string_gut(n_string_drop_ownership(&s));
662 jleave:
663 NYD_LEAVE;
664 return (out != NULL);
667 FL bool_t
668 qp_decode_part(struct str *out, struct str const *in, struct str *outrest,
669 struct str *inrest_or_null){
670 struct n_string s, *sp;
671 char const *is, *ie;
672 NYD_ENTER;
674 if(outrest->l != 0){
675 is = out->s;
676 *out = *outrest;
677 outrest->s = n_UNCONST(is);
678 outrest->l = 0;
681 /* n_ERR_OVERFLOW */
682 if(UIZ_MAX -1 - out->l <= in->l ||
683 SI32_MAX <= out->l + in->l) /* XXX wrong, we may replace */
684 goto jerr;
686 sp = n_string_creat(&s);
687 sp = n_string_take_ownership(sp, out->s,
688 (out->l == 0 ? 0 : out->l +1), out->l);
689 sp = n_string_reserve(sp, in->l + (in->l >> 2));
691 for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
692 si32_t c;
694 if((c = *is++) != '='){
695 jpushc:
696 n_string_push_c(sp, (char)c);
697 continue;
700 /* RFC 2045, 6.7:
701 * Therefore, when decoding a Quoted-Printable body, any
702 * trailing white space on a line must be deleted, as it will
703 * necessarily have been added by intermediate transport
704 * agents */
705 for(; is <= ie && blankchar(*is); ++is)
707 if(is >= ie){
708 /* Soft line break? */
709 if(*is == '\n')
710 goto jsoftnl;
711 goto jpushc; /* TODO According to RFC 2045, 6.7,
712 * ++is; TODO we should warn the user, but have no context
713 * goto jebody; TODO to do so; can't over and over */
716 /* Not a soft line break? */
717 if(*is != '\n'){
718 if((c = n_c_from_hex_base16(is)) >= 0){
719 is += 2;
720 goto jpushc;
722 /* Invalid according to RFC 2045, section 6.7 */
723 /* TODO Follow RFC 2045, 6.7 advise and simply put through */
724 c = '=';
725 goto jpushc;
726 /* TODO jebody:
727 * TODO if(n_psonce & n_PSO_UNICODE)
728 * n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
729 * TODO else{
730 * TODO c = '?';
731 * TODO goto jpushc;
732 * TODO }*/
735 /* CRLF line endings are encoded as QP, followed by a soft line break, so
736 * check for this special case, and simply forget we have seen one, so as
737 * not to end up with the entire DOS file in a contiguous buffer */
738 jsoftnl:
739 if(sp->s_len > 0 && sp->s_dat[sp->s_len - 1] == '\n'){
740 #if 0 /* TODO qp_decode_part() we do not normalize CRLF
741 * TODO to LF because for that we would need
742 * TODO to know if we are about to write to
743 * TODO the display or do save the file!
744 * TODO 'hope the MIME/send layer rewrite will
745 * TODO offer the possibility to DTRT */
746 if(sp->s_len > 1 && sp->s_dat[sp->s_len - 2] == '\r')
747 n_string_push_c(n_string_trunc(sp, sp->s_len - 2), '\n');
748 #endif
749 break;
752 /* C99 */{
753 char *cp;
754 size_t l;
756 if((l = PTR2SIZE(ie - is)) > 0){
757 if(inrest_or_null == NULL)
758 goto jerr;
759 n_str_assign_buf(inrest_or_null, is, l);
761 cp = outrest->s;
762 outrest->s = n_string_cp(sp);
763 outrest->l = s.s_len;
764 n_string_drop_ownership(sp);
765 if(cp != NULL)
766 n_free(cp);
768 break;
771 out->s = n_string_cp(sp);
772 out->l = sp->s_len;
773 n_string_gut(n_string_drop_ownership(sp));
774 jleave:
775 NYD_LEAVE;
776 return (out != NULL);
777 jerr:
778 out->l = 0;
779 out = NULL;
780 goto jleave;
783 FL size_t
784 b64_encode_calc_size(size_t len){
785 NYD2_ENTER;
786 if(len >= UIZ_MAX / 4)
787 len = UIZ_MAX;
788 else{
789 len = (len * 4) / 3;
790 len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
791 len += 2 + 1; /* CRLF, \0 */
793 NYD2_LEAVE;
794 return len;
797 FL struct str *
798 b64_encode(struct str *out, struct str const *in, enum b64flags flags){
799 ui8_t const *p;
800 size_t i, lnlen;
801 char *b64;
802 NYD_ENTER;
804 assert(!(flags & B64_NOPAD) ||
805 !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
807 p = (ui8_t const*)in->s;
809 if(!(flags & B64_BUF)){
810 if((i = b64_encode_calc_size(in->l)) == UIZ_MAX){
811 out = NULL;
812 goto jleave;
814 out->s = (flags & B64_SALLOC) ? n_autorec_alloc(i)
815 : n_realloc(out->s, i);
817 b64 = out->s;
819 if(!(flags & (B64_CRLF | B64_LF)))
820 flags &= ~B64_MULTILINE;
822 for(lnlen = 0, i = in->l; (ssize_t)i > 0; p += 3, i -= 3){
823 ui32_t a, b, c;
825 a = p[0];
826 b64[0] = a_me_b64_enctbl[a >> 2];
828 switch(i){
829 case 1:
830 b64[1] = a_me_b64_enctbl[((a & 0x3) << 4)];
831 b64[2] =
832 b64[3] = '=';
833 break;
834 case 2:
835 b = p[1];
836 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
837 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2)];
838 b64[3] = '=';
839 break;
840 default:
841 b = p[1];
842 c = p[2];
843 b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
844 b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
845 b64[3] = a_me_b64_enctbl[c & 0x3F];
846 break;
849 b64 += 4;
850 if(!(flags & B64_MULTILINE))
851 continue;
852 lnlen += 4;
853 if(lnlen < B64_LINESIZE)
854 continue;
856 lnlen = 0;
857 if(flags & B64_CRLF)
858 *b64++ = '\r';
859 if(flags & (B64_CRLF | B64_LF))
860 *b64++ = '\n';
863 if((flags & (B64_CRLF | B64_LF)) &&
864 (!(flags & B64_MULTILINE) || lnlen != 0)){
865 if(flags & B64_CRLF)
866 *b64++ = '\r';
867 if(flags & (B64_CRLF | B64_LF))
868 *b64++ = '\n';
869 }else if(flags & B64_NOPAD)
870 while(b64 != out->s && b64[-1] == '=')
871 --b64;
873 out->l = PTR2SIZE(b64 - out->s);
874 out->s[out->l] = '\0';
876 /* Base64 includes + and /, replace them with _ and -.
877 * This is base64url according to RFC 4648, then. Since we only support
878 * that for encoding and it is only used for boundary strings, this is
879 * yet a primitive implementation; xxx use tables; support decoding */
880 if(flags & B64_RFC4648URL){
881 char c;
883 for(b64 = out->s; (c = *b64) != '\0'; ++b64)
884 if(c == '+')
885 *b64 = '-';
886 else if(c == '/')
887 *b64 = '_';
889 jleave:
890 NYD_LEAVE;
891 return out;
894 FL struct str *
895 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
896 enum b64flags flags){
897 struct str in;
898 NYD_ENTER;
900 in.s = n_UNCONST(vp);
901 in.l = vp_len;
902 out = b64_encode(out, &in, flags);
903 NYD_LEAVE;
904 return out;
907 #ifdef notyet
908 FL struct str *
909 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags){
910 struct str in;
911 NYD_ENTER;
913 in.s = n_UNCONST(cp);
914 in.l = strlen(cp);
915 out = b64_encode(out, &in, flags);
916 NYD_LEAVE;
917 return out;
919 #endif /* notyet */
921 FL bool_t
922 b64_decode(struct str *out, struct str const *in){
923 struct str work;
924 size_t len;
925 NYD_ENTER;
927 out->l = 0;
929 if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
930 goto jerr;
932 /* Ignore an empty input, as may happen for an empty final line */
933 if(work.l == 0)
934 out->s = n_realloc(out->s, 1);
935 else if(work.l >= 4 && !(work.l & 3)){
936 out->s = n_realloc(out->s, len +1);
937 if((ssize_t)(len = a_me_b64_decode(out, &work)) < 0)
938 goto jerr;
939 }else
940 goto jerr;
941 out->s[out->l] = '\0';
942 jleave:
943 NYD_LEAVE;
944 return (out != NULL);
945 jerr:
946 out = NULL;
947 goto jleave;
950 FL bool_t
951 b64_decode_header(struct str *out, struct str const *in){
952 struct str outr, inr;
953 NYD_ENTER;
955 if(!b64_decode(out, in)){
956 memset(&outr, 0, sizeof outr);
957 memset(&inr, 0, sizeof inr);
959 if(!b64_decode_part(out, in, &outr, &inr) || outr.l > 0 || inr.l > 0)
960 out = NULL;
962 if(inr.s != NULL)
963 n_free(inr.s);
964 if(outr.s != NULL)
965 n_free(outr.s);
967 NYD_LEAVE;
968 return (out != NULL);
971 FL bool_t
972 b64_decode_part(struct str *out, struct str const *in, struct str *outrest,
973 struct str *inrest_or_null){
974 struct str work, save;
975 ui32_t a, b, c, b64l;
976 char ca, cb, cc, cx;
977 struct n_string s, workbuf;
978 size_t len;
979 NYD_ENTER;
981 n_string_creat(&s);
982 if((len = out->l) > 0 && out->s[len] == '\0')
983 (void)n_string_take_ownership(&s, out->s, len +1, len);
984 else{
985 if(len > 0)
986 n_string_push_buf(&s, out->s, len);
987 if(out->s != NULL)
988 n_free(out->s);
990 out->s = NULL, out->l = 0;
991 n_string_creat(&workbuf);
993 if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
994 goto jerr;
996 if(outrest->l > 0){
997 n_string_push_buf(&s, outrest->s, outrest->l);
998 outrest->l = 0;
1001 /* n_ERR_OVERFLOW */
1002 if(UIZ_MAX - len <= s.s_len ||
1003 SI32_MAX <= len + s.s_len) /* XXX wrong, we may replace */
1004 goto jerr;
1006 if(work.l == 0)
1007 goto jok;
1009 /* This text decoder is extremely expensive, especially given that in all
1010 * but _invalid_ cases it is not even needed! So try once to do the normal
1011 * decoding, if that fails, go the hard way */
1012 save = work;
1013 out->s = n_string_resize(&s, len + (out->l = b64l = s.s_len))->s_dat;
1015 if(work.l >= 4 && a_me_b64_decode(out, &work) >= 0){
1016 n_string_trunc(&s, out->l);
1017 if(work.l == 0)
1018 goto jok;
1021 n_string_trunc(&s, b64l);
1022 work = save;
1023 out->s = NULL, out->l = 0;
1025 /* TODO b64_decode_part() does not yet STOP if it sees padding, whereas
1026 * TODO OpenSSL and mutt simply bail on such stuff */
1027 n_UNINIT(ca, 0);
1028 n_UNINIT(cb, 0);
1029 n_UNINIT(cc, 0);
1030 for(b64l = 0;;){
1031 ui32_t x;
1033 x = a_ME_B64_DECUI8((ui8_t)(cx = *work.s));
1034 switch(b64l){
1035 case 0:
1036 if(x >= a_ME_B64_EQU)
1037 goto jrepl;
1038 ca = cx;
1039 a = x;
1040 ++b64l;
1041 break;
1042 case 1:
1043 if(x >= a_ME_B64_EQU)
1044 goto jrepl;
1045 cb = cx;
1046 b = x;
1047 ++b64l;
1048 break;
1049 case 2:
1050 if(x == a_ME_B64_BAD)
1051 goto jrepl;
1052 cc = cx;
1053 c = x;
1054 ++b64l;
1055 break;
1056 case 3:
1057 if(x == a_ME_B64_BAD){
1058 jrepl:
1059 /* TODO This would be wrong since iconv(3) may be applied first! */
1060 #if 0
1061 if(n_psonce & n_PSO_UNICODE)
1062 n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
1063 else
1064 n_string_push_c(&s, '?');
1065 #endif
1067 }else if(c == a_ME_B64_EQU && x != a_ME_B64_EQU){
1068 /* This is not only invalid but bogus. Skip it over! */
1069 /* TODO This would be wrong since iconv(3) may be applied first! */
1070 #if 0
1071 n_string_push_buf(&s, n_UNIREPL n_UNIREPL n_UNIREPL n_UNIREPL,
1072 (sizeof(n_UNIREPL) -1) * 4);
1073 #endif
1074 b64l = 0;
1075 }else{
1076 ui8_t pb;
1078 pb = ((a << 2) | ((b & 0x30) >> 4));
1079 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1080 n_string_push_c(&s, (char)pb);
1081 pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
1082 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1083 n_string_push_c(&s, (char)pb);
1084 if(x != a_ME_B64_EQU){
1085 pb = (((c & 0x03) << 6) | x);
1086 if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1087 n_string_push_c(&s, (char)pb);
1089 ++b64l;
1091 break;
1094 ++work.s;
1095 if(--work.l == 0){
1096 if(b64l > 0 && b64l != 4){
1097 if(inrest_or_null == NULL)
1098 goto jerr;
1099 inrest_or_null->s = n_realloc(inrest_or_null->s, b64l +1);
1100 inrest_or_null->s[0] = ca;
1101 if(b64l > 1)
1102 inrest_or_null->s[1] = cb;
1103 if(b64l > 2)
1104 inrest_or_null->s[2] = cc;
1105 inrest_or_null->s[inrest_or_null->l = b64l] = '\0';
1107 goto jok;
1109 if(b64l == 4)
1110 b64l = 0;
1113 jok:
1114 out->s = n_string_cp(&s);
1115 out->l = s.s_len;
1116 n_string_drop_ownership(&s);
1117 jleave:
1118 n_string_gut(&workbuf);
1119 n_string_gut(&s);
1120 NYD_LEAVE;
1121 return (out != NULL);
1122 jerr:
1123 out = NULL;
1124 goto jleave;
1127 /* s-it-mode */