Tweak previous, it added a bad memory access
[s-mailx.git] / mime.c
blobe482c95cdb23836b3456890cd7caa097b38f8bec
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ MIME support functions.
3 *@ TODO Complete rewrite.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 2000
10 * Gunnar Ritter. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Gunnar Ritter
23 * and his contributors.
24 * 4. Neither the name of Gunnar Ritter nor the names of his contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY GUNNAR RITTER AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL GUNNAR RITTER OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
40 #undef n_FILE
41 #define n_FILE mime
43 #ifndef HAVE_AMALGAMATION
44 # include "nail.h"
45 #endif
47 /* Don't ask, but it keeps body and soul together */
48 enum a_mime_structure_hack{
49 a_MIME_SH_NONE,
50 a_MIME_SH_COMMENT,
51 a_MIME_SH_QUOTE
54 static char *_cs_iter_base, *_cs_iter;
55 #ifdef HAVE_ICONV
56 # define _CS_ITER_GET() \
57 ((_cs_iter != NULL) ? _cs_iter : ok_vlook(CHARSET_8BIT_OKEY))
58 #else
59 # define _CS_ITER_GET() ((_cs_iter != NULL) ? _cs_iter : ok_vlook(ttycharset))
60 #endif
61 #define _CS_ITER_STEP() _cs_iter = n_strsep(&_cs_iter_base, ',', TRU1)
63 /* Is 7-bit enough? */
64 #ifdef HAVE_ICONV
65 static bool_t _has_highbit(char const *s);
66 static bool_t _name_highbit(struct name *np);
67 #endif
69 /* fwrite(3) while checking for displayability */
70 static ssize_t _fwrite_td(struct str const *input, enum tdflags flags,
71 struct str *outrest, struct quoteflt *qf);
73 /* Convert header fields to RFC 2047 format and write to the file fo */
74 static ssize_t mime_write_tohdr(struct str *in, FILE *fo,
75 size_t *colp, enum a_mime_structure_hack msh);
77 /* Write len characters of the passed string to the passed file, doing charset
78 * and header conversion */
80 /* Write an address to a header field */
81 static ssize_t mime_write_tohdr_a(struct str *in, FILE *f,
82 size_t *colp);
83 #ifdef HAVE_ICONV
84 static ssize_t a_mime__convhdra(struct str *inp, FILE *fp, size_t *colp,
85 enum a_mime_structure_hack msh);
86 #else
87 # define a_mime__convhdra(S,F,C,MSH) mime_write_tohdr(S, F, C, MSH)
88 #endif
90 /* Append to buf, handling resizing */
91 static void _append_str(char **buf, size_t *sz, size_t *pos,
92 char const *str, size_t len);
93 static void _append_conv(char **buf, size_t *sz, size_t *pos,
94 char const *str, size_t len);
96 #ifdef HAVE_ICONV
97 static bool_t
98 _has_highbit(char const *s)
100 bool_t rv = TRU1;
101 NYD_ENTER;
103 if (s) {
105 if ((ui8_t)*s & 0x80)
106 goto jleave;
107 while (*s++ != '\0');
109 rv = FAL0;
110 jleave:
111 NYD_LEAVE;
112 return rv;
115 static bool_t
116 _name_highbit(struct name *np)
118 bool_t rv = TRU1;
119 NYD_ENTER;
121 while (np) {
122 if (_has_highbit(np->n_name) || _has_highbit(np->n_fullname))
123 goto jleave;
124 np = np->n_flink;
126 rv = FAL0;
127 jleave:
128 NYD_LEAVE;
129 return rv;
131 #endif /* HAVE_ICONV */
133 static sigjmp_buf __mimefwtd_actjmp; /* TODO someday.. */
134 static int __mimefwtd_sig; /* TODO someday.. */
135 static sighandler_type __mimefwtd_opipe;
136 static void
137 __mimefwtd_onsig(int sig) /* TODO someday, we won't need it no more */
139 NYD_X; /* Signal handler */
140 __mimefwtd_sig = sig;
141 siglongjmp(__mimefwtd_actjmp, 1);
144 static ssize_t
145 _fwrite_td(struct str const *input, enum tdflags flags, struct str *outrest,
146 struct quoteflt *qf)
148 /* TODO note: after send/MIME layer rewrite we will have a string pool
149 * TODO so that memory allocation count drops down massively; for now,
150 * TODO v14.* that is, we pay a lot & heavily depend on the allocator */
151 /* TODO well if we get a broken pipe here, and it happens to
152 * TODO happen pretty easy when sleeping in a full pipe buffer,
153 * TODO then the current codebase performs longjump away;
154 * TODO this leaves memory leaks behind ('think up to 3 per,
155 * TODO dep. upon alloca availability). For this to be fixed
156 * TODO we either need to get rid of the longjmp()s (tm) or
157 * TODO the storage must come from the outside or be tracked
158 * TODO in a carrier struct. Best both. But storage reuse
159 * TODO would be a bigbig win besides */
160 /* *input* _may_ point to non-modifyable buffer; but even then it only
161 * needs to be dup'ed away if we have to transform the content */
162 struct str in, out;
163 ssize_t rv;
164 NYD_ENTER;
165 n_UNUSED(outrest);
167 in = *input;
168 out.s = NULL;
169 out.l = 0;
171 #ifdef HAVE_ICONV
172 if ((flags & TD_ICONV) && iconvd != (iconv_t)-1) {
173 char *buf = NULL;
175 if (outrest != NULL && outrest->l > 0) {
176 in.l = outrest->l + input->l;
177 in.s = buf = smalloc(in.l +1);
178 memcpy(in.s, outrest->s, outrest->l);
179 memcpy(&in.s[outrest->l], input->s, input->l);
180 outrest->l = 0;
183 if (n_iconv_str(iconvd, n_ICONV_UNIDEFAULT, &out, &in, &in) != 0 &&
184 outrest != NULL && in.l > 0) {
185 n_iconv_reset(iconvd);
186 /* Incomplete multibyte at EOF is special */
187 if (flags & _TD_EOF) {
188 out.s = srealloc(out.s, out.l + sizeof(n_unirepl));
189 if(n_psonce & n_PSO_UNICODE){
190 memcpy(&out.s[out.l], n_unirepl, sizeof(n_unirepl) -1);
191 out.l += sizeof(n_unirepl) -1;
192 }else
193 out.s[out.l++] = '?';
194 } else
195 n_str_add(outrest, &in);
197 in = out;
198 out.l = 0;
199 out.s = NULL;
200 flags &= ~_TD_BUFCOPY;
202 if (buf != NULL)
203 free(buf);
204 }else
205 #endif
206 /* Else, if we will modify the data bytes and thus introduce the potential
207 * of messing up multibyte sequences which become splitted over buffer
208 * boundaries TODO and unless we don't have our filter chain which will
209 * TODO make these hacks go by, buffer data until we see a NL */
210 if((flags & (TD_ISPR | TD_DELCTRL)) && outrest != NULL &&
211 #ifdef HAVE_ICONV
212 iconvd == (iconv_t)-1 &&
213 #endif
214 (!(flags & _TD_EOF) || outrest->l > 0)
216 size_t i;
217 char *cp;
219 for (cp = &in.s[in.l]; cp > in.s && cp[-1] != '\n'; --cp)
221 i = PTR2SIZE(cp - in.s);
223 if (i != in.l) {
224 if (i > 0) {
225 n_str_assign_buf(outrest, cp, in.l - i);
226 cp = smalloc(i +1);
227 memcpy(cp, in.s, in.l = i);
228 (in.s = cp)[in.l = i] = '\0';
229 flags &= ~_TD_BUFCOPY;
230 } else {
231 n_str_add_buf(outrest, input->s, input->l);
232 rv = 0;
233 goto jleave;
238 if (flags & TD_ISPR)
239 makeprint(&in, &out);
240 else if (flags & _TD_BUFCOPY)
241 n_str_dup(&out, &in);
242 else
243 out = in;
244 if (flags & TD_DELCTRL)
245 out.l = delctrl(out.s, out.l);
247 __mimefwtd_sig = 0;
248 __mimefwtd_opipe = safe_signal(SIGPIPE, &__mimefwtd_onsig);
249 if (sigsetjmp(__mimefwtd_actjmp, 1)) {
250 rv = 0;
251 goto j__sig;
254 rv = quoteflt_push(qf, out.s, out.l);
256 j__sig:
257 if (out.s != in.s)
258 free(out.s);
259 if (in.s != input->s)
260 free(in.s);
261 safe_signal(SIGPIPE, __mimefwtd_opipe);
262 if (__mimefwtd_sig != 0)
263 n_raise(__mimefwtd_sig);
264 jleave:
265 NYD_LEAVE;
266 return rv;
269 static ssize_t
270 mime_write_tohdr(struct str *in, FILE *fo, size_t *colp,
271 enum a_mime_structure_hack msh)
273 /* TODO mime_write_tohdr(): we don't know the name of our header->maxcol..
274 * TODO MIME/send layer rewrite: more available state!!
275 * TODO Because of this we cannot make a difference in between structured
276 * TODO and unstructured headers (RFC 2047, 5. (2))
277 * TODO This means, e.g., that this gets called multiple times for a
278 * TODO structured header and always starts thinking it is at column 0.
279 * TODO I.e., it may get called for only the content of a comment etc.,
280 * TODO not knowing anything of its context.
281 * TODO Instead we should have a list of header body content tokens,
282 * TODO convert them, and then dump the converted tokens, breaking lines.
283 * TODO I.e., get rid of convhdra, mime_write_tohdr_a and such...
284 * TODO Somewhen, the following should produce smooth stuff:
285 * TODO ' "Hallo\"," Dr. Backe "Bl\"ö\"d" (Gell) <ha@llöch.en>
286 * TODO "Nochm\"a\"l"<ta@tu.da>(Dümm)'
287 * TODO NOT MULTIBYTE SAFE IF AN ENCODED WORD HAS TO BE SPLITTED!
288 * TODO To be better we had to mbtowc_l() (non-std! and no locale!!) and
289 * TODO work char-wise! -> S-CText..
290 * TODO The real problem for STD compatibility is however that "in" is
291 * TODO already iconv(3) encoded to the target character set! We could
292 * TODO also solve it (very expensively!) if we would narrow down to an
293 * TODO encoded word and then iconv(3)+MIME encode in one go, in which
294 * TODO case multibyte errors could be catched! */
295 enum {
296 /* Maximum line length */
297 a_MAXCOL_NENC = MIME_LINELEN,
298 a_MAXCOL = MIME_LINELEN_RFC2047
301 struct str cout, cin;
302 enum {
303 _FIRST = 1<<0, /* Nothing written yet, start of string */
304 _MSH_NOTHING = 1<<1, /* Now, really: nothing at all has been written */
305 a_ANYENC = 1<<2, /* We have RFC 2047 anything at least once */
306 _NO_QP = 1<<3, /* No quoted-printable allowed */
307 _NO_B64 = 1<<4, /* Ditto, base64 */
308 _ENC_LAST = 1<<5, /* Last round generated encoded word */
309 _SHOULD_BEE = 1<<6, /* Avoid lines longer than SHOULD via encoding */
310 _RND_SHIFT = 7,
311 _RND_MASK = (1<<_RND_SHIFT) - 1,
312 _SPACE = 1<<(_RND_SHIFT+1), /* Leading whitespace */
313 _8BIT = 1<<(_RND_SHIFT+2), /* High bit set */
314 _ENCODE = 1<<(_RND_SHIFT+3), /* Need encoding */
315 _ENC_B64 = 1<<(_RND_SHIFT+4), /* - let it be base64 */
316 _OVERLONG = 1<<(_RND_SHIFT+5) /* Temporarily rised limit */
317 } flags;
318 char const *cset7, *cset8, *wbot, *upper, *wend, *wcur;
319 ui32_t cset7_len, cset8_len;
320 size_t col, i, j;
321 ssize_t sz;
323 NYD_ENTER;
325 cout.s = NULL, cout.l = 0;
326 cset7 = ok_vlook(charset_7bit);
327 cset7_len = (ui32_t)strlen(cset7);
328 cset8 = _CS_ITER_GET(); /* TODO MIME/send layer: iter active? iter! else */
329 cset8_len = (ui32_t)strlen(cset8);
331 flags = _FIRST;
332 if(msh != a_MIME_SH_NONE)
333 flags |= _MSH_NOTHING;
335 /* RFC 1468, "MIME Considerations":
336 * ISO-2022-JP may also be used in MIME Part 2 headers. The "B"
337 * encoding should be used with ISO-2022-JP text. */
338 /* TODO of course, our current implementation won't deal properly with
339 * TODO any stateful encoding at all... (the standard says each encoded
340 * TODO word must include all necessary reset sequences..., i.e., each
341 * TODO encoded word must be a self-contained iconv(3) life cycle) */
342 if (!asccasecmp(cset8, "iso-2022-jp") || mime_enc_target() == MIMEE_B64)
343 flags |= _NO_QP;
345 wbot = in->s;
346 upper = wbot + in->l;
347 sz = 0;
349 if(colp == NULL || (col = *colp) == 0)
350 col = sizeof("Mail-Followup-To: ") -1; /* TODO dreadful thing */
352 /* The user may specify empy quoted-strings or comments, keep them! */
353 if(wbot == upper) {
354 if(flags & _MSH_NOTHING){
355 flags &= ~_MSH_NOTHING;
356 putc((msh == a_MIME_SH_COMMENT ? '(' : '"'), fo);
357 sz = 1;
358 ++col;
360 } else for (; wbot < upper; flags &= ~_FIRST, wbot = wend) {
361 flags &= _RND_MASK;
362 wcur = wbot;
363 while (wcur < upper && whitechar(*wcur)) {
364 flags |= _SPACE;
365 ++wcur;
368 /* Any occurrence of whitespace resets prevention of lines >SHOULD via
369 * enforced encoding (xxx SHOULD, but.. encoding is expensive!!) */
370 if (flags & _SPACE)
371 flags &= ~_SHOULD_BEE;
373 /* Data ends with WS - dump it and done.
374 * Also, if we have seen multiple successive whitespace characters, then
375 * if there was no encoded word last, i.e., if we can simply take them
376 * over to the output as-is, keep one WS for possible later separation
377 * purposes and simply print the others as-is, directly! */
378 if (wcur == upper) {
379 wend = wcur;
380 goto jnoenc_putws;
382 if ((flags & (_ENC_LAST | _SPACE)) == _SPACE && wcur - wbot > 1) {
383 wend = wcur - 1;
384 goto jnoenc_putws;
387 /* Skip over a word to next non-whitespace, keep track along the way
388 * whether our 7-bit charset suffices to represent the data */
389 for (wend = wcur; wend < upper; ++wend) {
390 if (whitechar(*wend))
391 break;
392 if ((uc_i)*wend & 0x80)
393 flags |= _8BIT;
396 /* Decide whether the range has to become encoded or not */
397 i = PTR2SIZE(wend - wcur);
398 j = mime_enc_mustquote(wcur, i, MIMEEF_ISHEAD);
399 /* If it just cannot fit on a SHOULD line length, force encode */
400 if (i > a_MAXCOL_NENC) {
401 flags |= _SHOULD_BEE; /* (Sigh: SHOULD only, not MUST..) */
402 goto j_beejump;
404 if ((flags & _SHOULD_BEE) || j > 0) {
405 j_beejump:
406 flags |= _ENCODE;
407 /* Use base64 if requested or more than 50% -37.5-% of the bytes of
408 * the string need to be encoded */
409 if ((flags & _NO_QP) || j >= i >> 1)/*(i >> 2) + (i >> 3))*/
410 flags |= _ENC_B64;
412 DBG( if (flags & _8BIT) assert(flags & _ENCODE); )
414 if (!(flags & _ENCODE)) {
415 /* Encoded word produced, but no linear whitespace for necessary RFC
416 * 2047 separation? Generate artificial data (bad standard!) */
417 if ((flags & (_ENC_LAST | _SPACE)) == _ENC_LAST) {
418 if (col >= a_MAXCOL) {
419 putc('\n', fo);
420 ++sz;
421 col = 0;
423 if(flags & _MSH_NOTHING){
424 flags &= ~_MSH_NOTHING;
425 putc((msh == a_MIME_SH_COMMENT ? '(' : '"'), fo);
426 ++sz;
427 ++col;
429 putc(' ', fo);
430 ++sz;
431 ++col;
434 jnoenc_putws:
435 flags &= ~_ENC_LAST;
437 /* todo No effort here: (1) v15.0 has to bring complete rewrite,
438 * todo (2) the standard is braindead and (3) usually this is one
439 * todo word only, and why be smarter than the standard? */
440 jnoenc_retry:
441 i = PTR2SIZE(wend - wbot);
442 if (i + col + ((flags & _MSH_NOTHING) != 0) <=
443 (flags & _OVERLONG ? MIME_LINELEN_MAX
444 : (flags & a_ANYENC ? a_MAXCOL : a_MAXCOL_NENC))) {
445 if(flags & _MSH_NOTHING){
446 flags &= ~_MSH_NOTHING;
447 putc((msh == a_MIME_SH_COMMENT ? '(' : '"'), fo);
448 ++sz;
449 ++col;
451 i = fwrite(wbot, sizeof *wbot, i, fo);
452 sz += i;
453 col += i;
454 continue;
457 /* Doesn't fit, try to break the line first; */
458 if (col > 1) {
459 putc('\n', fo);
460 if (whitechar(*wbot)) {
461 putc((uc_i)*wbot, fo);
462 ++wbot;
463 } else
464 putc(' ', fo); /* Bad standard: artificial data! */
465 sz += 2;
466 col = 1;
467 if(flags & _MSH_NOTHING){
468 flags &= ~_MSH_NOTHING;
469 putc((msh == a_MIME_SH_COMMENT ? '(' : '"'), fo);
470 ++sz;
471 ++col;
473 flags |= _OVERLONG;
474 goto jnoenc_retry;
477 /* It is so long that it needs to be broken, effectively causing
478 * artificial spaces to be inserted (bad standard), yuck */
479 /* todo This is not multibyte safe, as above; and completely stupid
480 * todo P.S.: our _SHOULD_BEE prevents these cases in the meanwhile */
481 /* FIXME n_PSO_UNICODE and parse using UTF-8 sync possibility! */
482 wcur = wbot + MIME_LINELEN_MAX - 8;
483 while (wend > wcur)
484 wend -= 4;
485 goto jnoenc_retry;
486 } else {
487 /* Encoding to encoded word(s); deal with leading whitespace, place
488 * a separator first as necessary: encoded words must always be
489 * separated from text and other encoded words with linear WS.
490 * And if an encoded word was last, intermediate whitespace must
491 * also be encoded, otherwise it would get stripped away! */
492 wcur = n_UNCONST(n_empty);
493 if ((flags & (_ENC_LAST | _SPACE)) != _SPACE) {
494 /* Reinclude whitespace */
495 flags &= ~_SPACE;
496 /* We don't need to place a separator at the very beginning */
497 if (!(flags & _FIRST))
498 wcur = n_UNCONST(" ");
499 } else
500 wcur = wbot++;
502 flags |= a_ANYENC | _ENC_LAST;
503 n_pstate |= n_PS_HEADER_NEEDED_MIME;
505 /* RFC 2047:
506 * An 'encoded-word' may not be more than 75 characters long,
507 * including 'charset', 'encoding', 'encoded-text', and
508 * delimiters. If it is desirable to encode more text than will
509 * fit in an 'encoded-word' of 75 characters, multiple
510 * 'encoded-word's (separated by CRLF SPACE) may be used.
512 * While there is no limit to the length of a multiple-line
513 * header field, each line of a header field that contains one
514 * or more 'encoded-word's is limited to 76 characters */
515 jenc_retry:
516 cin.s = n_UNCONST(wbot);
517 cin.l = PTR2SIZE(wend - wbot);
519 /* C99 */{
520 struct str *xout;
522 if(flags & _ENC_B64)
523 xout = b64_encode(&cout, &cin, B64_ISHEAD | B64_ISENCWORD);
524 else
525 xout = qp_encode(&cout, &cin, QP_ISHEAD | QP_ISENCWORD);
526 if(xout == NULL){
527 sz = -1;
528 break;
530 j = xout->l;
532 /* (Avoid trigraphs in the RFC 2047 placeholder..) */
533 i = j + (flags & _8BIT ? cset8_len : cset7_len) + sizeof("=!!B!!=") -1;
534 if (*wcur != '\0')
535 ++i;
537 jenc_retry_same:
538 /* Unfortunately RFC 2047 explicitly disallows encoded words to be
539 * longer (just like RFC 5322's "a line SHOULD fit in 78 but MAY be
540 * 998 characters long"), so we cannot use the _OVERLONG mechanism,
541 * even though all tested mailers seem to support it */
542 if (i + col <= (/*flags & _OVERLONG ? MIME_LINELEN_MAX :*/ a_MAXCOL)) {
543 if(flags & _MSH_NOTHING){
544 flags &= ~_MSH_NOTHING;
545 putc((msh == a_MIME_SH_COMMENT ? '(' : '"'), fo);
546 ++sz;
547 ++col;
549 fprintf(fo, "%.1s=?%s?%c?%.*s?=",
550 wcur, (flags & _8BIT ? cset8 : cset7),
551 (flags & _ENC_B64 ? 'B' : 'Q'),
552 (int)cout.l, cout.s);
553 sz += i;
554 col += i;
555 continue;
558 /* Doesn't fit, try to break the line first */
559 /* TODO I've commented out the _FIRST test since we (1) cannot do
560 * TODO _OVERLONG since (MUAs support but) the standard disallows,
561 * TODO and because of our iconv problem i prefer an empty first line
562 * TODO in favour of a possibly messed up multibytes character. :-( */
563 if (col > 1 /* TODO && !(flags & _FIRST)*/) {
564 putc('\n', fo);
565 sz += 2;
566 col = 1;
567 if (!(flags & _SPACE)) {
568 putc(' ', fo);
569 wcur = n_UNCONST(n_empty);
570 /*flags |= _OVERLONG;*/
571 goto jenc_retry_same;
572 } else {
573 putc((uc_i)*wcur, fo);
574 if (whitechar(*(wcur = wbot)))
575 ++wbot;
576 else {
577 flags &= ~_SPACE;
578 wcur = n_UNCONST(n_empty);
580 /*flags &= ~_OVERLONG;*/
581 goto jenc_retry;
585 /* It is so long that it needs to be broken, effectively causing
586 * artificial data to be inserted (bad standard), yuck */
587 /* todo This is not multibyte safe, as above */
588 /*if (!(flags & _OVERLONG)) { Mechanism explicitly forbidden by 2047
589 flags |= _OVERLONG;
590 goto jenc_retry;
593 /* FIXME n_PSO_UNICODE and parse using UTF-8 sync possibility! */
594 i = PTR2SIZE(wend - wbot) + !!(flags & _SPACE);
595 j = 3 + !(flags & _ENC_B64);
596 for (;;) {
597 wend -= j;
598 i -= j;
599 /* (Note the problem most likely is the transfer-encoding blow,
600 * which is why we test this *after* the decrements.. */
601 if (i <= a_MAXCOL)
602 break;
604 goto jenc_retry;
608 if(!(flags & _MSH_NOTHING) && msh != a_MIME_SH_NONE){
609 putc((msh == a_MIME_SH_COMMENT ? ')' : '"'), fo);
610 ++sz;
611 ++col;
614 if (cout.s != NULL)
615 free(cout.s);
617 if(colp != NULL)
618 *colp = col;
619 NYD_LEAVE;
620 return sz;
623 static ssize_t
624 mime_write_tohdr_a(struct str *in, FILE *f, size_t *colp)
626 struct str xin;
627 size_t i;
628 char const *cp, *lastcp;
629 ssize_t sz, x;
630 NYD_ENTER;
632 in->s[in->l] = '\0';
633 lastcp = in->s;
634 if((cp = routeaddr(in->s)) != NULL && cp > lastcp) {
635 xin.s = in->s;
636 xin.l = PTR2SIZE(cp - in->s);
637 if ((sz = mime_write_tohdr_a(&xin, f, colp)) < 0)
638 goto jleave;
639 xin.s[xin.l] = '<';
640 lastcp = cp;
641 } else {
642 cp = in->s;
643 sz = 0;
646 for( ; *cp != '\0'; ++cp){
647 switch(*cp){
648 case '(':
649 i = PTR2SIZE(cp - lastcp);
650 if(i > 0){
651 if(fwrite(lastcp, 1, i, f) != i)
652 goto jerr;
653 sz += i;
655 lastcp = ++cp;
656 cp = skip_comment(cp);
657 if(cp > lastcp)
658 --cp;
659 /* We want to keep empty comments, too! */
660 xin.s = n_UNCONST(lastcp);
661 xin.l = PTR2SIZE(cp - lastcp);
662 if ((x = a_mime__convhdra(&xin, f, colp, a_MIME_SH_COMMENT)) < 0)
663 goto jerr;
664 sz += x;
665 lastcp = &cp[1];
666 break;
667 case '"':
668 i = PTR2SIZE(cp - lastcp);
669 if(i > 0){
670 if(fwrite(lastcp, 1, i, f) != i)
671 goto jerr;
672 sz += i;
674 for(lastcp = ++cp; *cp != '\0'; ++cp){
675 if(*cp == '"')
676 break;
677 if(*cp == '\\' && cp[1] != '\0')
678 ++cp;
680 /* We want to keep empty quoted-strings, too! */
681 xin.s = n_UNCONST(lastcp);
682 xin.l = PTR2SIZE(cp - lastcp);
683 if((x = a_mime__convhdra(&xin, f, colp, a_MIME_SH_QUOTE)) < 0)
684 goto jerr;
685 sz += x;
686 ++sz;
687 lastcp = &cp[1];
688 break;
692 i = PTR2SIZE(cp - lastcp);
693 if(i > 0){
694 if(fwrite(lastcp, 1, i, f) != i)
695 goto jerr;
696 sz += i;
698 jleave:
699 NYD_LEAVE;
700 return sz;
701 jerr:
702 sz = -1;
703 goto jleave;
706 #ifdef HAVE_ICONV
707 static ssize_t
708 a_mime__convhdra(struct str *inp, FILE *fp, size_t *colp,
709 enum a_mime_structure_hack msh){
710 struct str ciconv;
711 ssize_t rv;
712 NYD_ENTER;
714 rv = 0;
715 ciconv.s = NULL;
717 if(inp->l > 0 && iconvd != (iconv_t)-1){
718 ciconv.l = 0;
719 if(n_iconv_str(iconvd, n_ICONV_IGN_NOREVERSE, &ciconv, inp, NULL) != 0){
720 n_iconv_reset(iconvd);
721 goto jleave;
723 *inp = ciconv;
726 rv = mime_write_tohdr(inp, fp, colp, msh);
727 jleave:
728 if(ciconv.s != NULL)
729 free(ciconv.s);
730 NYD_LEAVE;
731 return rv;
733 #endif /* HAVE_ICONV */
735 static void
736 _append_str(char **buf, size_t *sz, size_t *pos, char const *str, size_t len)
738 NYD_ENTER;
739 *buf = srealloc(*buf, *sz += len);
740 memcpy(&(*buf)[*pos], str, len);
741 *pos += len;
742 NYD_LEAVE;
745 static void
746 _append_conv(char **buf, size_t *sz, size_t *pos, char const *str, size_t len)
748 struct str in, out;
749 NYD_ENTER;
751 in.s = n_UNCONST(str);
752 in.l = len;
753 mime_fromhdr(&in, &out, TD_ISPR | TD_ICONV);
754 _append_str(buf, sz, pos, out.s, out.l);
755 free(out.s);
756 NYD_LEAVE;
759 FL bool_t
760 charset_iter_reset(char const *a_charset_to_try_first) /* TODO elim. dups! */
762 char const *sarr[3];
763 size_t sarrl[3], len;
764 char *cp;
765 NYD_ENTER;
766 n_UNUSED(a_charset_to_try_first);
768 #ifdef HAVE_ICONV
769 sarr[2] = ok_vlook(CHARSET_8BIT_OKEY);
771 if(a_charset_to_try_first != NULL && strcmp(a_charset_to_try_first, sarr[2]))
772 sarr[0] = a_charset_to_try_first;
773 else
774 sarr[0] = NULL;
776 if((sarr[1] = ok_vlook(sendcharsets)) == NULL &&
777 ok_blook(sendcharsets_else_ttycharset)){
778 cp = n_UNCONST(ok_vlook(ttycharset));
779 if(strcmp(cp, sarr[2]) && (sarr[0] == NULL || strcmp(cp, sarr[0])))
780 sarr[1] = cp;
782 #else
783 sarr[2] = ok_vlook(ttycharset);
784 #endif
786 sarrl[2] = len = strlen(sarr[2]);
787 #ifdef HAVE_ICONV
788 if ((cp = n_UNCONST(sarr[1])) != NULL)
789 len += (sarrl[1] = strlen(cp));
790 else
791 sarrl[1] = 0;
792 if ((cp = n_UNCONST(sarr[0])) != NULL)
793 len += (sarrl[0] = strlen(cp));
794 else
795 sarrl[0] = 0;
796 #endif
798 _cs_iter_base = cp = salloc(len + 1 + 1 +1);
800 #ifdef HAVE_ICONV
801 if ((len = sarrl[0]) != 0) {
802 memcpy(cp, sarr[0], len);
803 cp[len] = ',';
804 cp += ++len;
806 if ((len = sarrl[1]) != 0) {
807 memcpy(cp, sarr[1], len);
808 cp[len] = ',';
809 cp += ++len;
811 #endif
812 len = sarrl[2];
813 memcpy(cp, sarr[2], len);
814 cp[len] = '\0';
816 _CS_ITER_STEP();
817 NYD_LEAVE;
818 return (_cs_iter != NULL);
821 FL bool_t
822 charset_iter_next(void)
824 bool_t rv;
825 NYD_ENTER;
827 _CS_ITER_STEP();
828 rv = (_cs_iter != NULL);
829 NYD_LEAVE;
830 return rv;
833 FL bool_t
834 charset_iter_is_valid(void)
836 bool_t rv;
837 NYD_ENTER;
839 rv = (_cs_iter != NULL);
840 NYD_LEAVE;
841 return rv;
844 FL char const *
845 charset_iter(void)
847 char const *rv;
848 NYD_ENTER;
850 rv = _cs_iter;
851 NYD_LEAVE;
852 return rv;
855 FL char const *
856 charset_iter_or_fallback(void)
858 char const *rv;
859 NYD_ENTER;
861 rv = _CS_ITER_GET();
862 NYD_LEAVE;
863 return rv;
866 FL void
867 charset_iter_recurse(char *outer_storage[2]) /* TODO LEGACY FUN, REMOVE */
869 NYD_ENTER;
870 outer_storage[0] = _cs_iter_base;
871 outer_storage[1] = _cs_iter;
872 NYD_LEAVE;
875 FL void
876 charset_iter_restore(char *outer_storage[2]) /* TODO LEGACY FUN, REMOVE */
878 NYD_ENTER;
879 _cs_iter_base = outer_storage[0];
880 _cs_iter = outer_storage[1];
881 NYD_LEAVE;
884 #ifdef HAVE_ICONV
885 FL char const *
886 need_hdrconv(struct header *hp) /* TODO once only, then iter */
888 struct n_header_field *hfp;
889 char const *rv;
890 NYD_ENTER;
892 rv = NULL;
894 if((hfp = hp->h_user_headers) != NULL)
895 do if(_has_highbit(hfp->hf_dat + hfp->hf_nl +1))
896 goto jneeds;
897 while((hfp = hfp->hf_next) != NULL);
899 if((hfp = hp->h_custom_headers) != NULL ||
900 (hp->h_custom_headers = hfp = n_customhdr_query()) != NULL)
901 do if(_has_highbit(hfp->hf_dat + hfp->hf_nl +1))
902 goto jneeds;
903 while((hfp = hfp->hf_next) != NULL);
905 if (hp->h_mft != NULL) {
906 if (_name_highbit(hp->h_mft))
907 goto jneeds;
909 if (hp->h_from != NULL) {
910 if (_name_highbit(hp->h_from))
911 goto jneeds;
912 } else if (_has_highbit(myaddrs(NULL)))
913 goto jneeds;
914 if (hp->h_replyto) {
915 if (_name_highbit(hp->h_replyto))
916 goto jneeds;
917 } else if (_has_highbit(ok_vlook(replyto)))
918 goto jneeds;
919 if (hp->h_sender) {
920 if (_name_highbit(hp->h_sender))
921 goto jneeds;
922 } else if (_has_highbit(ok_vlook(sender)))
923 goto jneeds;
925 if (_name_highbit(hp->h_to))
926 goto jneeds;
927 if (_name_highbit(hp->h_cc))
928 goto jneeds;
929 if (_name_highbit(hp->h_bcc))
930 goto jneeds;
931 if (_has_highbit(hp->h_subject))
932 jneeds:
933 rv = _CS_ITER_GET(); /* TODO MIME/send: iter active? iter! else */
934 NYD_LEAVE;
935 return rv;
937 #endif /* HAVE_ICONV */
939 FL void
940 mime_fromhdr(struct str const *in, struct str *out, enum tdflags flags)
942 /* TODO mime_fromhdr(): is called with strings that contain newlines;
943 * TODO this is the usual newline problem all around the codebase;
944 * TODO i.e., if we strip it, then the display misses it ;>
945 * TODO this is why it is so messy and why S-nail v14.2 plus additional
946 * TODO patch for v14.5.2 (and maybe even v14.5.3 subminor) occurred, and
947 * TODO why our display reflects what is contained in the message: the 1:1
948 * TODO relationship of message content and display!
949 * TODO instead a header line should be decoded to what it is (a single
950 * TODO line that is) and it should be objective to the backend whether
951 * TODO it'll be folded to fit onto the display or not, e.g., for search
952 * TODO purposes etc. then the only condition we have to honour in here
953 * TODO is that whitespace in between multiple adjacent MIME encoded words
954 * TODO á la RFC 2047 is discarded; i.e.: this function should deal with
955 * TODO RFC 2047 and be renamed: mime_fromhdr() -> mime_rfc2047_decode() */
956 struct str cin, cout;
957 char *p, *op, *upper;
958 ui32_t convert, lastenc, lastoutl;
959 #ifdef HAVE_ICONV
960 char const *tcs;
961 char *cbeg;
962 iconv_t fhicd = (iconv_t)-1;
963 #endif
964 NYD_ENTER;
966 out->l = 0;
967 if (in->l == 0) {
968 *(out->s = smalloc(1)) = '\0';
969 goto jleave;
971 out->s = NULL;
973 #ifdef HAVE_ICONV
974 tcs = ok_vlook(ttycharset);
975 #endif
976 p = in->s;
977 upper = p + in->l;
978 lastenc = lastoutl = 0;
980 while (p < upper) {
981 op = p;
982 if (*p == '=' && *(p + 1) == '?') {
983 p += 2;
984 #ifdef HAVE_ICONV
985 cbeg = p;
986 #endif
987 while (p < upper && *p != '?')
988 ++p; /* strip charset */
989 if (p >= upper)
990 goto jnotmime;
991 ++p;
992 #ifdef HAVE_ICONV
993 if (flags & TD_ICONV) {
994 size_t i = PTR2SIZE(p - cbeg);
995 char *ltag, *cs = ac_alloc(i);
997 memcpy(cs, cbeg, --i);
998 cs[i] = '\0';
999 /* RFC 2231 extends the RFC 2047 character set definition in
1000 * encoded words by language tags - silently strip those off */
1001 if ((ltag = strchr(cs, '*')) != NULL)
1002 *ltag = '\0';
1004 if (fhicd != (iconv_t)-1)
1005 n_iconv_close(fhicd);
1006 fhicd = asccasecmp(cs, tcs) ? n_iconv_open(tcs, cs) : (iconv_t)-1;
1007 ac_free(cs);
1009 #endif
1010 switch (*p) {
1011 case 'B': case 'b':
1012 convert = CONV_FROMB64;
1013 break;
1014 case 'Q': case 'q':
1015 convert = CONV_FROMQP;
1016 break;
1017 default: /* invalid, ignore */
1018 goto jnotmime;
1020 if (*++p != '?')
1021 goto jnotmime;
1022 cin.s = ++p;
1023 cin.l = 1;
1024 for (;;) {
1025 if (PTRCMP(p + 1, >=, upper))
1026 goto jnotmime;
1027 if (*p++ == '?' && *p == '=')
1028 break;
1029 ++cin.l;
1031 ++p;
1032 --cin.l;
1034 cout.s = NULL;
1035 cout.l = 0;
1036 if (convert == CONV_FROMB64) {
1037 if(!b64_decode_header(&cout, &cin))
1038 n_str_assign_cp(&cout, _("[Invalid Base64 encoding]"));
1039 }else if(!qp_decode_header(&cout, &cin))
1040 n_str_assign_cp(&cout, _("[Invalid Quoted-Printable encoding]"));
1042 out->l = lastenc;
1043 #ifdef HAVE_ICONV
1044 if ((flags & TD_ICONV) && fhicd != (iconv_t)-1) {
1045 cin.s = NULL, cin.l = 0; /* XXX string pool ! */
1046 convert = n_iconv_str(fhicd, n_ICONV_UNIDEFAULT, &cin, &cout, NULL);
1047 out = n_str_add(out, &cin);
1048 if (convert) {/* n_ERR_INVAL at EOS */
1049 n_iconv_reset(fhicd);
1050 out = n_str_add_buf(out, n_qm, 1); /* TODO unicode replacement */
1052 free(cin.s);
1053 } else
1054 #endif
1055 out = n_str_add(out, &cout);
1056 lastenc = lastoutl = out->l;
1057 free(cout.s);
1058 } else
1059 jnotmime: {
1060 bool_t onlyws;
1062 p = op;
1063 onlyws = (lastenc > 0);
1064 for (;;) {
1065 if (++op == upper)
1066 break;
1067 if (op[0] == '=' && (PTRCMP(op + 1, ==, upper) || op[1] == '?'))
1068 break;
1069 if (onlyws && !blankchar(*op))
1070 onlyws = FAL0;
1073 out = n_str_add_buf(out, p, PTR2SIZE(op - p));
1074 p = op;
1075 if (!onlyws || lastoutl != lastenc)
1076 lastenc = out->l;
1077 lastoutl = out->l;
1080 out->s[out->l] = '\0';
1082 if (flags & TD_ISPR) {
1083 makeprint(out, &cout);
1084 free(out->s);
1085 *out = cout;
1087 if (flags & TD_DELCTRL)
1088 out->l = delctrl(out->s, out->l);
1089 #ifdef HAVE_ICONV
1090 if (fhicd != (iconv_t)-1)
1091 n_iconv_close(fhicd);
1092 #endif
1093 jleave:
1094 NYD_LEAVE;
1095 return;
1098 FL char *
1099 mime_fromaddr(char const *name)
1101 char const *cp, *lastcp;
1102 char *res = NULL;
1103 size_t ressz = 1, rescur = 0;
1104 NYD_ENTER;
1106 if (name == NULL)
1107 goto jleave;
1108 if (*name == '\0') {
1109 res = savestr(name);
1110 goto jleave;
1113 if ((cp = routeaddr(name)) != NULL && cp > name) {
1114 _append_conv(&res, &ressz, &rescur, name, PTR2SIZE(cp - name));
1115 lastcp = cp;
1116 } else
1117 cp = lastcp = name;
1119 for ( ; *cp; ++cp) {
1120 switch (*cp) {
1121 case '(':
1122 _append_str(&res, &ressz, &rescur, lastcp, PTR2SIZE(cp - lastcp + 1));
1123 lastcp = ++cp;
1124 cp = skip_comment(cp);
1125 if (--cp > lastcp)
1126 _append_conv(&res, &ressz, &rescur, lastcp, PTR2SIZE(cp - lastcp));
1127 lastcp = cp;
1128 break;
1129 case '"':
1130 while (*cp) {
1131 if (*++cp == '"')
1132 break;
1133 if (*cp == '\\' && cp[1] != '\0')
1134 ++cp;
1136 break;
1139 if (cp > lastcp)
1140 _append_str(&res, &ressz, &rescur, lastcp, PTR2SIZE(cp - lastcp));
1141 /* TODO rescur==0: inserted to silence Coverity ...; check that */
1142 if (rescur == 0)
1143 res = n_UNCONST(n_empty);
1144 else
1145 res[rescur] = '\0';
1146 { char *x = res;
1147 res = savestr(res);
1148 free(x);
1150 jleave:
1151 NYD_LEAVE;
1152 return res;
1155 FL ssize_t
1156 xmime_write(char const *ptr, size_t size, FILE *f, enum conversion convert,
1157 enum tdflags dflags)
1159 ssize_t rv;
1160 struct quoteflt *qf;
1161 NYD_ENTER;
1163 quoteflt_reset(qf = quoteflt_dummy(), f);
1164 rv = mime_write(ptr, size, f, convert, dflags, qf, NULL, NULL);
1165 quoteflt_flush(qf);
1166 NYD_LEAVE;
1167 return rv;
1170 static sigjmp_buf __mimemw_actjmp; /* TODO someday.. */
1171 static int __mimemw_sig; /* TODO someday.. */
1172 static sighandler_type __mimemw_opipe;
1173 static void
1174 __mimemw_onsig(int sig) /* TODO someday, we won't need it no more */
1176 NYD_X; /* Signal handler */
1177 __mimemw_sig = sig;
1178 siglongjmp(__mimemw_actjmp, 1);
1181 FL ssize_t
1182 mime_write(char const *ptr, size_t size, FILE *f,
1183 enum conversion convert, enum tdflags volatile dflags,
1184 struct quoteflt *qf, struct str * volatile outrest,
1185 struct str * volatile inrest)
1187 /* TODO note: after send/MIME layer rewrite we will have a string pool
1188 * TODO so that memory allocation count drops down massively; for now,
1189 * TODO v14.0 that is, we pay a lot & heavily depend on the allocator */
1190 struct str in, out;
1191 ssize_t volatile sz;
1192 NYD_ENTER;
1194 dflags |= _TD_BUFCOPY;
1195 in.s = n_UNCONST(ptr);
1196 in.l = size;
1198 if(inrest != NULL && inrest->l > 0){
1199 out.s = smalloc(inrest->l + size + 1);
1200 memcpy(out.s, inrest->s, inrest->l);
1201 if(size > 0)
1202 memcpy(&out.s[inrest->l], in.s, size);
1203 size += inrest->l;
1204 inrest->l = 0;
1205 (in.s = out.s)[in.l = size] = '\0';
1206 dflags &= ~_TD_BUFCOPY;
1209 out.s = NULL;
1210 out.l = 0;
1212 if ((sz = size) == 0) {
1213 if (outrest != NULL && outrest->l != 0)
1214 goto jconvert;
1215 goto jleave;
1218 #ifdef HAVE_ICONV
1219 if ((dflags & TD_ICONV) && iconvd != (iconv_t)-1 &&
1220 (convert == CONV_TOQP || convert == CONV_8BIT ||
1221 convert == CONV_TOB64 || convert == CONV_TOHDR)) {
1222 if (n_iconv_str(iconvd, n_ICONV_IGN_NOREVERSE, &out, &in, NULL) != 0) {
1223 n_iconv_reset(iconvd);
1224 /* TODO This causes hard-failure. We would need to have an action
1225 * TODO policy FAIL|IGNORE|SETERROR(but continue). Better huh? */
1226 sz = -1;
1227 goto jleave;
1229 in = out;
1230 out.s = NULL;
1231 dflags &= ~_TD_BUFCOPY;
1233 #endif
1235 jconvert:
1236 __mimemw_sig = 0;
1237 __mimemw_opipe = safe_signal(SIGPIPE, &__mimemw_onsig);
1238 if (sigsetjmp(__mimemw_actjmp, 1))
1239 goto jleave;
1241 switch (convert) {
1242 case CONV_FROMQP:
1243 if(!qp_decode_part(&out, &in, outrest, inrest)){
1244 n_err(_("Invalid Quoted-Printable encoding ignored\n"));
1245 sz = 0; /* TODO sz = -1 stops outer levels! */
1246 break;
1248 goto jqpb64_dec;
1249 case CONV_TOQP:
1250 if(qp_encode(&out, &in, QP_NONE) == NULL){
1251 sz = 0; /* TODO sz = -1 stops outer levels! */
1252 break;
1254 goto jqpb64_enc;
1255 case CONV_8BIT:
1256 sz = quoteflt_push(qf, in.s, in.l);
1257 break;
1258 case CONV_FROMB64:
1259 if(!b64_decode_part(&out, &in, outrest, inrest))
1260 goto jeb64;
1261 outrest = NULL;
1262 if(0){
1263 /* FALLTHRU */
1264 case CONV_FROMB64_T:
1265 if(!b64_decode_part(&out, &in, outrest, inrest)){
1266 jeb64:
1267 n_err(_("Invalid Base64 encoding ignored\n"));
1268 sz = 0; /* TODO sz = -1 stops outer levels! */
1269 break;
1272 jqpb64_dec:
1273 if ((sz = out.l) != 0) {
1274 ui32_t opl = qf->qf_pfix_len;
1275 sz = _fwrite_td(&out, (dflags & ~_TD_BUFCOPY), outrest, qf);
1276 qf->qf_pfix_len = opl;
1278 break;
1279 case CONV_TOB64:
1280 if(b64_encode(&out, &in, B64_LF | B64_MULTILINE) == NULL){
1281 sz = -1;
1282 break;
1284 jqpb64_enc:
1285 sz = fwrite(out.s, sizeof *out.s, out.l, f);
1286 if (sz != (ssize_t)out.l)
1287 sz = -1;
1288 break;
1289 case CONV_FROMHDR:
1290 mime_fromhdr(&in, &out, TD_ISPR | TD_ICONV | (dflags & TD_DELCTRL));
1291 sz = quoteflt_push(qf, out.s, out.l);
1292 break;
1293 case CONV_TOHDR:
1294 sz = mime_write_tohdr(&in, f, NULL, a_MIME_SH_NONE);
1295 break;
1296 case CONV_TOHDR_A:{
1297 size_t col;
1299 if(dflags & _TD_BUFCOPY){
1300 n_str_dup(&out, &in);
1301 in = out;
1302 out.s = NULL;
1303 dflags &= ~_TD_BUFCOPY;
1305 col = 0;
1306 sz = mime_write_tohdr_a(&in, f, &col);
1307 } break;
1308 default:
1309 sz = _fwrite_td(&in, dflags, NULL, qf);
1310 break;
1312 jleave:
1313 if (out.s != NULL)
1314 free(out.s);
1315 if (in.s != ptr)
1316 free(in.s);
1317 safe_signal(SIGPIPE, __mimemw_opipe);
1318 if (__mimemw_sig != 0)
1319 n_raise(__mimemw_sig);
1320 NYD_LEAVE;
1321 return sz;
1324 /* s-it-mode */