Tweak *rfc822-show-all* manual text
[s-mailx.git] / mime.c
blobea7d69650a92a51723304176144b5d152b9932a6
1 /*
2 * S-nail - a mail user agent derived from Berkeley Mail.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 Steffen "Daode" Nurpmeso.
6 */
7 /*
8 * Copyright (c) 2000
9 * Gunnar Ritter. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by Gunnar Ritter
22 * and his contributors.
23 * 4. Neither the name of Gunnar Ritter nor the names of his contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY GUNNAR RITTER AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL GUNNAR RITTER OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
40 #include "rcv.h"
41 #include "extern.h"
42 #include <ctype.h>
43 #include <errno.h>
44 #ifdef HAVE_WCTYPE_H
45 #include <wctype.h>
46 #endif /* HAVE_WCTYPE_H */
49 * Mail -- a mail program
51 * MIME support functions.
55 * You won't guess what these are for.
57 static const char basetable[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
58 static char *mimetypes_world = "/etc/mime.types";
59 static char *mimetypes_user = "~/.mime.types";
60 char *us_ascii = "us-ascii";
62 static int mustquote_body(int c);
63 static int mustquote_hdr(const char *cp, int wordstart, int wordend);
64 static int mustquote_inhdrq(int c);
65 static size_t delctrl(char *cp, size_t sz);
66 static char *getcharset(int isclean);
67 static int has_highbit(register const char *s);
68 #ifdef HAVE_ICONV
69 static void uppercopy(char *dest, const char *src);
70 static void stripdash(char *p);
71 static void invalid_seq(int c);
72 #endif /* HAVE_ICONV */
73 static int is_this_enc(const char *line, const char *encoding);
74 static char *mime_tline(char *x, char *l);
75 static char *mime_type(char *ext, char *filename);
76 static enum mimeclean mime_isclean(FILE *f);
77 static enum conversion gettextconversion(void);
78 static char *ctohex(int c, char *hex);
79 static size_t mime_write_toqp(struct str *in, FILE *fo, int (*mustquote)(int));
80 static void mime_str_toqp(struct str *in, struct str *out,
81 int (*mustquote)(int), int inhdr);
82 static void mime_fromqp(struct str *in, struct str *out, int ishdr);
83 static size_t mime_write_tohdr(struct str *in, FILE *fo);
84 static size_t convhdra(char *str, size_t len, FILE *fp);
85 static size_t mime_write_tohdr_a(struct str *in, FILE *f);
86 static void addstr(char **buf, size_t *sz, size_t *pos, char *str, size_t len);
87 static void addconv(char **buf, size_t *sz, size_t *pos, char *str, size_t len);
88 static size_t fwrite_td(void *ptr, size_t size, size_t nmemb, FILE *f,
89 enum tdflags flags, char *prefix, size_t prefixlen);
92 * Check if c must be quoted inside a message's body.
94 static int
95 mustquote_body(int c)
97 if (c != '\n' && (c < 040 || c == '=' || c >= 0177))
98 return 1;
99 return 0;
103 * Check if c must be quoted inside a message's header.
105 static int
106 mustquote_hdr(const char *cp, int wordstart, int wordend)
108 int c = *cp & 0377;
110 if (c != '\n' && (c < 040 || c >= 0177))
111 return 1;
112 if (wordstart && cp[0] == '=' && cp[1] == '?')
113 return 1;
114 if (cp[0] == '?' && cp[1] == '=' &&
115 (wordend || cp[2] == '\0' || whitechar(cp[2]&0377)))
116 return 1;
117 return 0;
121 * Check if c must be quoted inside a quoting in a message's header.
123 static int
124 mustquote_inhdrq(int c)
126 if (c != '\n'
127 && (c <= 040 || c == '=' || c == '?' || c == '_' || c >= 0177))
128 return 1;
129 return 0;
132 static size_t
133 delctrl(char *cp, size_t sz)
135 size_t x = 0, y = 0;
137 while (x < sz) {
138 if (!cntrlchar(cp[x]&0377))
139 cp[y++] = cp[x];
140 x++;
142 return y;
145 static char defcharset[] = "utf-8";
148 * Get the character set dependant on the conversion.
150 static char *
151 getcharset(int isclean)
153 char *charset;
155 if (isclean & (MIME_CTRLCHAR|MIME_HASNUL))
156 charset = NULL;
157 else if (isclean & MIME_HIGHBIT) {
158 charset = (wantcharset && wantcharset != (char *)-1) ?
159 wantcharset : value("charset");
160 if (charset == NULL) {
161 charset = defcharset;
163 } else {
165 * This variable shall remain undocumented because
166 * only experts should change it.
168 charset = value("charset7");
169 if (charset == NULL) {
170 charset = us_ascii;
173 return charset;
177 * Get the setting of the terminal's character set.
179 char *
180 gettcharset(void)
182 char *t;
184 if ((t = value("ttycharset")) == NULL)
185 if ((t = value("charset")) == NULL)
186 t = defcharset;
187 return t;
190 static int
191 has_highbit(const char *s)
193 if (s) {
195 if (*s & 0200)
196 return 1;
197 while (*s++ != '\0');
199 return 0;
202 static int
203 name_highbit(struct name *np)
205 while (np) {
206 if (has_highbit(np->n_name) || has_highbit(np->n_fullname))
207 return 1;
208 np = np->n_flink;
210 return 0;
213 char *
214 need_hdrconv(struct header *hp, enum gfield w)
216 if (w & GIDENT) {
217 if (hp->h_from) {
218 if (name_highbit(hp->h_from))
219 goto needs;
220 } else if (has_highbit(myaddrs(hp)))
221 goto needs;
222 if (hp->h_organization) {
223 if (has_highbit(hp->h_organization))
224 goto needs;
225 } else if (has_highbit(value("ORGANIZATION")))
226 goto needs;
227 if (hp->h_replyto) {
228 if (name_highbit(hp->h_replyto))
229 goto needs;
230 } else if (has_highbit(value("replyto")))
231 goto needs;
232 if (hp->h_sender) {
233 if (name_highbit(hp->h_sender))
234 goto needs;
235 } else if (has_highbit(value("sender")))
236 goto needs;
238 if (w & GTO && name_highbit(hp->h_to))
239 goto needs;
240 if (w & GCC && name_highbit(hp->h_cc))
241 goto needs;
242 if (w & GBCC && name_highbit(hp->h_bcc))
243 goto needs;
244 if (w & GSUBJECT && has_highbit(hp->h_subject))
245 goto needs;
246 return NULL;
247 needs: return getcharset(MIME_HIGHBIT);
250 #ifdef HAVE_ICONV
252 * Convert a string, upper-casing the characters.
254 static void
255 uppercopy(char *dest, const char *src)
258 *dest++ = upperconv(*src & 0377);
259 while (*src++);
263 * Strip dashes.
265 static void
266 stripdash(char *p)
268 char *q = p;
271 if (*(q = p) != '-')
272 q++;
273 while (*p++);
277 * An iconv_open wrapper that tries to convert between character set
278 * naming conventions.
280 iconv_t
281 iconv_open_ft(const char *tocode, const char *fromcode)
283 iconv_t id;
284 char *t, *f;
287 * On Linux systems, this call may succeed.
289 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
290 return id;
292 * Remove the "iso-" prefixes for Solaris.
294 if (ascncasecmp(tocode, "iso-", 4) == 0)
295 tocode += 4;
296 else if (ascncasecmp(tocode, "iso", 3) == 0)
297 tocode += 3;
298 if (ascncasecmp(fromcode, "iso-", 4) == 0)
299 fromcode += 4;
300 else if (ascncasecmp(fromcode, "iso", 3) == 0)
301 fromcode += 3;
302 if (*tocode == '\0' || *fromcode == '\0')
303 return (iconv_t) -1;
304 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
305 return id;
307 * Solaris prefers upper-case charset names. Don't ask...
309 t = salloc(strlen(tocode) + 1);
310 uppercopy(t, tocode);
311 f = salloc(strlen(fromcode) + 1);
312 uppercopy(f, fromcode);
313 if ((id = iconv_open(t, f)) != (iconv_t)-1)
314 return id;
316 * Strip dashes for UnixWare.
318 stripdash(t);
319 stripdash(f);
320 if ((id = iconv_open(t, f)) != (iconv_t)-1)
321 return id;
323 * Add your vendor's sillynesses here.
326 * If the encoding names are equal at this point, they
327 * are just not understood by iconv(), and we cannot
328 * sensibly use it in any way. We do not perform this
329 * as an optimization above since iconv() can otherwise
330 * be used to check the validity of the input even with
331 * identical encoding names.
333 if (strcmp(t, f) == 0)
334 errno = 0;
335 return (iconv_t)-1;
339 * Fault-tolerant iconv() function.
340 * (2012-09-24: export and use it exclusively to isolate prototype problems
341 * (*inb* is 'const char**' except in POSIX) in a single place.
342 * GNU libiconv even allows for configuration time const/non-const..
343 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
344 * support compiler invocations which bail on error, so no -Werror.
346 /* Citrus project? */
347 #if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
348 # define __INBCAST (const char**)
349 #endif
350 #ifndef __INBCAST
351 # define __INBCAST
352 #endif
354 size_t
355 iconv_ft(iconv_t cd, char **inb, size_t *inbleft,
356 char **outb, size_t *outbleft, int tolerant)
358 size_t sz;
360 while ((sz = iconv(cd, __INBCAST inb, inbleft, outb, outbleft)) ==
361 (size_t)-1
362 #undef __INBCAST
363 && tolerant && (errno == EILSEQ || errno == EINVAL)) {
364 if (*inbleft > 0) {
365 (*inb)++;
366 (*inbleft)--;
367 } else {
368 **outb = '\0';
369 break;
371 if (*outbleft > 0) {
372 *(*outb)++ = '?';
373 (*outbleft)--;
374 } else {
375 **outb = '\0';
376 break;
379 return sz;
383 * Print an error because of an invalid character sequence.
385 /*ARGSUSED*/
386 static void
387 invalid_seq(int c)
389 (void)c;
390 /*fprintf(stderr, "iconv: cannot convert %c\n", c);*/
392 #endif /* HAVE_ICONV */
394 static int
395 is_this_enc(const char *line, const char *encoding)
397 int quoted = 0, c;
399 if (*line == '"')
400 quoted = 1, line++;
401 while (*line && *encoding)
402 if (c = *line++, lowerconv(c) != *encoding++)
403 return 0;
404 if (quoted && *line == '"')
405 return 1;
406 if (*line == '\0' || whitechar(*line & 0377))
407 return 1;
408 return 0;
412 * Get the mime encoding from a Content-Transfer-Encoding header field.
414 enum mimeenc
415 mime_getenc(char *p)
417 if (is_this_enc(p, "7bit"))
418 return MIME_7B;
419 if (is_this_enc(p, "8bit"))
420 return MIME_8B;
421 if (is_this_enc(p, "base64"))
422 return MIME_B64;
423 if (is_this_enc(p, "binary"))
424 return MIME_BIN;
425 if (is_this_enc(p, "quoted-printable"))
426 return MIME_QP;
427 return MIME_NONE;
431 * Get the mime content from a Content-Type header field, other parameters
432 * already stripped.
434 int
435 mime_getcontent(char *s)
437 if (strchr(s, '/') == NULL) /* for compatibility with non-MIME */
438 return MIME_TEXT;
439 if (asccasecmp(s, "text/plain") == 0)
440 return MIME_TEXT_PLAIN;
441 if (asccasecmp(s, "text/html") == 0)
442 return MIME_TEXT_HTML;
443 if (ascncasecmp(s, "text/", 5) == 0)
444 return MIME_TEXT;
445 if (asccasecmp(s, "message/rfc822") == 0)
446 return MIME_822;
447 if (ascncasecmp(s, "message/", 8) == 0)
448 return MIME_MESSAGE;
449 if (asccasecmp(s, "multipart/alternative") == 0)
450 return MIME_ALTERNATIVE;
451 if (asccasecmp(s, "multipart/digest") == 0)
452 return MIME_DIGEST;
453 if (ascncasecmp(s, "multipart/", 10) == 0)
454 return MIME_MULTI;
455 if (asccasecmp(s, "application/x-pkcs7-mime") == 0 ||
456 asccasecmp(s, "application/pkcs7-mime") == 0)
457 return MIME_PKCS7;
458 return MIME_UNKNOWN;
462 * Get a mime style parameter from a header line.
464 char *
465 mime_getparam(char *param, char *h)
467 char *p = h, *q, *r;
468 int c;
469 size_t sz;
471 sz = strlen(param);
472 if (!whitechar(*p & 0377)) {
473 c = '\0';
474 while (*p && (*p != ';' || c == '\\')) {
475 c = c == '\\' ? '\0' : *p;
476 p++;
478 if (*p++ == '\0')
479 return NULL;
481 for (;;) {
482 while (whitechar(*p & 0377))
483 p++;
484 if (ascncasecmp(p, param, sz) == 0) {
485 p += sz;
486 while (whitechar(*p & 0377))
487 p++;
488 if (*p++ == '=')
489 break;
491 c = '\0';
492 while (*p && (*p != ';' || c == '\\')) {
493 if (*p == '"' && c != '\\') {
494 p++;
495 while (*p && (*p != '"' || c == '\\')) {
496 c = c == '\\' ? '\0' : *p;
497 p++;
499 p++;
500 } else {
501 c = c == '\\' ? '\0' : *p;
502 p++;
505 if (*p++ == '\0')
506 return NULL;
508 while (whitechar(*p & 0377))
509 p++;
510 q = p;
511 c = '\0';
512 if (*p == '"') {
513 p++;
514 if ((q = strchr(p, '"')) == NULL)
515 return NULL;
516 } else {
517 q = p;
518 while (*q && !whitechar(*q & 0377) && *q != ';')
519 q++;
521 sz = q - p;
522 r = salloc(q - p + 1);
523 memcpy(r, p, sz);
524 *(r + sz) = '\0';
525 return r;
529 * Get the boundary out of a Content-Type: multipart/xyz header field.
531 char *
532 mime_getboundary(char *h)
534 char *p, *q;
535 size_t sz;
537 if ((p = mime_getparam("boundary", h)) == NULL)
538 return NULL;
539 sz = strlen(p);
540 q = salloc(sz + 3);
541 memcpy(q, "--", 2);
542 memcpy(q + 2, p, sz);
543 *(q + sz + 2) = '\0';
544 return q;
548 * Get a line like "text/html html" and look if x matches the extension.
550 static char *
551 mime_tline(char *x, char *l)
553 char *type, *n;
554 int match = 0;
556 if ((*l & 0200) || alphachar(*l & 0377) == 0)
557 return NULL;
558 type = l;
559 while (blankchar(*l & 0377) == 0 && *l != '\0')
560 l++;
561 if (*l == '\0')
562 return NULL;
563 *l++ = '\0';
564 while (blankchar(*l & 0377) != 0 && *l != '\0')
565 l++;
566 if (*l == '\0')
567 return NULL;
568 while (*l != '\0') {
569 n = l;
570 while (whitechar(*l & 0377) == 0 && *l != '\0')
571 l++;
572 if (*l != '\0')
573 *l++ = '\0';
574 if (strcmp(x, n) == 0) {
575 match = 1;
576 break;
578 while (whitechar(*l & 0377) != 0 && *l != '\0')
579 l++;
581 if (match != 0) {
582 n = salloc(strlen(type) + 1);
583 strcpy(n, type);
584 return n;
586 return NULL;
590 * Check the given MIME type file for extension ext.
592 static char *
593 mime_type(char *ext, char *filename)
595 FILE *f;
596 char *line = NULL;
597 size_t linesize = 0;
598 char *type = NULL;
600 if ((f = Fopen(filename, "r")) == NULL)
601 return NULL;
602 while (fgetline(&line, &linesize, NULL, NULL, f, 0)) {
603 if ((type = mime_tline(ext, line)) != NULL)
604 break;
606 Fclose(f);
607 if (line)
608 free(line);
609 return type;
613 * Return the Content-Type matching the extension of name.
615 char *
616 mime_filecontent(char *name)
618 char *ext, *content;
620 if ((ext = strrchr(name, '.')) == NULL || *++ext == '\0')
621 return NULL;
622 if ((content = mime_type(ext, expand(mimetypes_user))) != NULL)
623 return content;
624 if ((content = mime_type(ext, mimetypes_world)) != NULL)
625 return content;
626 return NULL;
630 * Check file contents.
632 static enum mimeclean
633 mime_isclean(FILE *f)
635 long initial_pos;
636 unsigned curlen = 1, maxlen = 0, limit = 950;
637 enum mimeclean isclean = 0;
638 char *cp;
639 int c = EOF, lastc;
641 initial_pos = ftell(f);
642 do {
643 lastc = c;
644 c = getc(f);
645 curlen++;
646 if (c == '\n' || c == EOF) {
648 * RFC 821 imposes a maximum line length of 1000
649 * characters including the terminating CRLF
650 * sequence. The configurable limit must not
651 * exceed that including a safety zone.
653 if (curlen > maxlen)
654 maxlen = curlen;
655 curlen = 1;
656 } else if (c & 0200) {
657 isclean |= MIME_HIGHBIT;
658 } else if (c == '\0') {
659 isclean |= MIME_HASNUL;
660 break;
661 } else if ((c < 040 && (c != '\t' && c != '\f')) || c == 0177) {
662 isclean |= MIME_CTRLCHAR;
664 } while (c != EOF);
665 if (lastc != '\n')
666 isclean |= MIME_NOTERMNL;
667 clearerr(f);
668 fseek(f, initial_pos, SEEK_SET);
669 if ((cp = value("maximum-unencoded-line-length")) != NULL)
670 limit = (unsigned)atoi(cp);
671 if (limit > 950)
672 limit = 950;
673 if (maxlen > limit)
674 isclean |= MIME_LONGLINES;
675 return isclean;
679 * Get the conversion that matches the encoding specified in the environment.
681 static enum conversion
682 gettextconversion(void)
684 char *p;
685 int convert;
687 if ((p = value("encoding")) == NULL)
688 return CONV_8BIT;
689 if (strcmp(p, "quoted-printable") == 0)
690 convert = CONV_TOQP;
691 else if (strcmp(p, "8bit") == 0)
692 convert = CONV_8BIT;
693 else {
694 fprintf(stderr, tr(177,
695 "Warning: invalid encoding %s, using 8bit\n"), p);
696 convert = CONV_8BIT;
698 return convert;
701 /*TODO Dobson: be037047c, contenttype==NULL||"text"==NULL control flow! */
703 get_mime_convert(FILE *fp, char **contenttype, char **charset,
704 enum mimeclean *isclean, int dosign)
706 int convert;
708 *isclean = mime_isclean(fp);
709 if (*isclean & MIME_HASNUL ||
710 (*contenttype &&
711 ascncasecmp(*contenttype, "text/", 5))) {
712 convert = CONV_TOB64;
713 if (*contenttype == NULL ||
714 ascncasecmp(*contenttype, "text/", 5) == 0)
715 *contenttype = "application/octet-stream";
716 *charset = NULL;
717 } else if (*isclean & (MIME_LONGLINES|MIME_CTRLCHAR|MIME_NOTERMNL) ||
718 dosign)
719 convert = CONV_TOQP;
720 else if (*isclean & MIME_HIGHBIT)
721 convert = gettextconversion();
722 else
723 convert = CONV_7BIT;
724 if (*contenttype == NULL ||
725 ascncasecmp(*contenttype, "text/", 5) == 0) {
726 *charset = getcharset(*isclean);
727 if (wantcharset == (char *)-1) {
728 *contenttype = "application/octet-stream";
729 *charset = NULL;
730 } if (*isclean & MIME_CTRLCHAR) {
731 convert = CONV_TOB64;
733 * RFC 2046 forbids control characters other than
734 * ^I or ^L in text/plain bodies. However, some
735 * obscure character sets actually contain these
736 * characters, so the content type can be set.
738 if ((*contenttype = value("contenttype-cntrl")) == NULL)
739 *contenttype = "application/octet-stream";
740 } else if (*contenttype == NULL)
741 *contenttype = "text/plain";
743 return convert;
747 * Convert c to a hexadecimal character string and store it in hex.
749 static char *
750 ctohex(int c, char *hex)
752 unsigned char d;
754 hex[2] = '\0';
755 d = c % 16;
756 hex[1] = basetable[d];
757 if (c > d)
758 hex[0] = basetable[(c - d) / 16];
759 else
760 hex[0] = basetable[0];
761 return hex;
765 * Write to a file converting to quoted-printable.
766 * The mustquote function determines whether a character must be quoted.
768 static size_t
769 mime_write_toqp(struct str *in, FILE *fo, int (*mustquote)(int))
771 char *p, *upper, *h, hex[3];
772 int l;
773 size_t sz;
775 sz = in->l;
776 upper = in->s + in->l;
777 for (p = in->s, l = 0; p < upper; p++) {
778 if (mustquote(*p) ||
779 (p < upper - 1 && p[1] == '\n' &&
780 blankchar(*p)) ||
781 (p < upper - 4 && l == 0 &&
782 *p == 'F' && p[1] == 'r' &&
783 p[2] == 'o' && p[3] == 'm') ||
784 (*p == '.' && l == 0 && p < upper - 1 &&
785 p[1] == '\n')) {
786 if (l >= 69) {
787 sz += 2;
788 fwrite("=\n", sizeof (char), 2, fo);
789 l = 0;
791 sz += 2;
792 putc('=', fo);
793 h = ctohex(*p, hex);
794 fwrite(h, sizeof *h, 2, fo);
795 l += 3;
796 } else {
797 if (*p == '\n')
798 l = 0;
799 else if (l >= 71) {
800 sz += 2;
801 fwrite("=\n", sizeof (char), 2, fo);
802 l = 0;
804 putc(*p, fo);
805 l++;
808 return sz;
812 * Write to a stringstruct converting to quoted-printable.
813 * The mustquote function determines whether a character must be quoted.
815 static void
816 mime_str_toqp(struct str *in, struct str *out, int (*mustquote)(int), int inhdr)
818 char *p, *q, *upper;
820 out->s = smalloc(in->l * 3 + 1);
821 q = out->s;
822 out->l = in->l;
823 upper = in->s + in->l;
824 for (p = in->s; p < upper; p++) {
825 if (mustquote(*p&0377) || (p+1 < upper && *(p + 1) == '\n' &&
826 blankchar(*p & 0377))) {
827 if (inhdr && *p == ' ') {
828 *q++ = '_';
829 } else {
830 out->l += 2;
831 *q++ = '=';
832 ctohex(*p&0377, q);
833 q += 2;
835 } else {
836 *q++ = *p;
839 *q = '\0';
843 * Write to a stringstruct converting from quoted-printable.
845 static void
846 mime_fromqp(struct str *in, struct str *out, int ishdr)
848 char *p, *q, *upper;
849 char quote[4];
851 out->l = in->l;
852 out->s = smalloc(out->l + 1);
853 upper = in->s + in->l;
854 for (p = in->s, q = out->s; p < upper; p++) {
855 if (*p == '=') {
856 do {
857 p++;
858 out->l--;
859 } while (blankchar(*p & 0377) && p < upper);
860 if (p == upper)
861 break;
862 if (*p == '\n') {
863 out->l--;
864 continue;
866 if (p + 1 >= upper)
867 break;
868 quote[0] = *p++;
869 quote[1] = *p;
870 quote[2] = '\0';
871 *q = (char)strtol(quote, NULL, 16);
872 q++;
873 out->l--;
874 } else if (ishdr && *p == '_')
875 *q++ = ' ';
876 else
877 *q++ = *p;
879 return;
882 #define mime_fromhdr_inc(inc) { \
883 size_t diff = q - out->s; \
884 out->s = srealloc(out->s, (maxstor += inc) + 1); \
885 q = &(out->s)[diff]; \
888 * Convert header fields from RFC 1522 format
890 void
891 mime_fromhdr(struct str *in, struct str *out, enum tdflags flags)
893 char *p, *q, *op, *upper, *cs, *cbeg, *tcs, *lastwordend = NULL;
894 struct str cin, cout;
895 int convert;
896 size_t maxstor, lastoutl = 0;
897 #ifdef HAVE_ICONV
898 iconv_t fhicd = (iconv_t)-1;
899 #endif
901 tcs = gettcharset();
902 maxstor = in->l;
903 out->s = smalloc(maxstor + 1);
904 out->l = 0;
905 upper = in->s + in->l;
906 for (p = in->s, q = out->s; p < upper; p++) {
907 op = p;
908 if (*p == '=' && *(p + 1) == '?') {
909 p += 2;
910 cbeg = p;
911 while (p < upper && *p != '?')
912 p++; /* strip charset */
913 if (p >= upper)
914 goto notmime;
915 cs = salloc(++p - cbeg);
916 memcpy(cs, cbeg, p - cbeg - 1);
917 cs[p - cbeg - 1] = '\0';
918 #ifdef HAVE_ICONV
919 if (fhicd != (iconv_t)-1)
920 iconv_close(fhicd);
921 if (strcmp(cs, tcs))
922 fhicd = iconv_open_ft(tcs, cs);
923 else
924 fhicd = (iconv_t)-1;
925 #endif
926 switch (*p) {
927 case 'B': case 'b':
928 convert = CONV_FROMB64;
929 break;
930 case 'Q': case 'q':
931 convert = CONV_FROMQP;
932 break;
933 default: /* invalid, ignore */
934 goto notmime;
936 if (*++p != '?')
937 goto notmime;
938 cin.s = ++p;
939 cin.l = 1;
940 for (;;) {
941 if (p == upper)
942 goto fromhdr_end;
943 if (*p++ == '?' && *p == '=')
944 break;
945 cin.l++;
947 cin.l--;
948 switch (convert) {
949 case CONV_FROMB64:
950 mime_fromb64(&cin, &cout, 1);
951 break;
952 case CONV_FROMQP:
953 mime_fromqp(&cin, &cout, 1);
954 break;
956 if (lastwordend) {
957 q = lastwordend;
958 out->l = lastoutl;
960 #ifdef HAVE_ICONV
961 if ((flags & TD_ICONV) && fhicd != (iconv_t)-1) {
962 char *iptr, *mptr, *nptr, *uptr;
963 size_t inleft, outleft;
965 again: inleft = cout.l;
966 outleft = maxstor - out->l;
967 mptr = nptr = q;
968 uptr = nptr + outleft;
969 iptr = cout.s;
970 if (iconv_ft(fhicd, &iptr, &inleft,
971 &nptr, &outleft, 0) == (size_t)-1 &&
972 errno == E2BIG) {
973 iconv_ft(fhicd, NULL, NULL, NULL, NULL,
975 mime_fromhdr_inc(inleft);
976 goto again;
979 * For state-dependent encodings,
980 * reset the state here, assuming
981 * that states are restricted to
982 * single encoded-word parts.
984 while (iconv_ft(fhicd, NULL, NULL,
985 &nptr, &outleft, 0) == (size_t)-1 &&
986 errno == E2BIG)
987 mime_fromhdr_inc(16);
988 out->l += uptr - mptr - outleft;
989 q += uptr - mptr - outleft;
990 } else {
991 #endif
992 while (cout.l > maxstor - out->l)
993 mime_fromhdr_inc(cout.l -
994 (maxstor - out->l));
995 memcpy(q, cout.s, cout.l);
996 q += cout.l;
997 out->l += cout.l;
998 #ifdef HAVE_ICONV
1000 #endif
1001 free(cout.s);
1002 lastwordend = q;
1003 lastoutl = out->l;
1004 } else {
1005 notmime:
1006 p = op;
1007 while (out->l >= maxstor)
1008 mime_fromhdr_inc(16);
1009 *q++ = *p;
1010 out->l++;
1011 if (!blankchar(*p&0377))
1012 lastwordend = NULL;
1015 fromhdr_end:
1016 *q = '\0';
1017 if (flags & TD_ISPR) {
1018 struct str new;
1019 makeprint(out, &new);
1020 free(out->s);
1021 *out = new;
1023 if (flags & TD_DELCTRL)
1024 out->l = delctrl(out->s, out->l);
1025 #ifdef HAVE_ICONV
1026 if (fhicd != (iconv_t)-1)
1027 iconv_close(fhicd);
1028 #endif
1029 return;
1033 * Convert header fields to RFC 1522 format and write to the file fo.
1035 static size_t
1036 mime_write_tohdr(struct str *in, FILE *fo)
1038 char *upper, *wbeg, *wend, *charset, *lastwordend = NULL, *lastspc, b,
1039 *charset7;
1040 struct str cin, cout;
1041 size_t sz = 0, col = 0, wr, charsetlen, charset7len;
1042 int quoteany, mustquote, broken,
1043 maxcol = 65 /* there is the header field's name, too */;
1045 upper = in->s + in->l;
1046 charset = getcharset(MIME_HIGHBIT);
1047 if ((charset7 = value("charset7")) == NULL)
1048 charset7 = us_ascii;
1049 charsetlen = strlen(charset);
1050 charset7len = strlen(charset7);
1051 charsetlen = smax(charsetlen, charset7len);
1052 b = 0;
1053 for (wbeg = in->s, quoteany = 0; wbeg < upper; wbeg++) {
1054 b |= *wbeg;
1055 if (mustquote_hdr(wbeg, wbeg == in->s, wbeg == &upper[-1]))
1056 quoteany++;
1058 if (2u * quoteany > in->l) {
1060 * Print the entire field in base64.
1062 for (wbeg = in->s; wbeg < upper; wbeg = wend) {
1063 wend = upper;
1064 cin.s = wbeg;
1065 for (;;) {
1066 cin.l = wend - wbeg;
1067 if (cin.l * 4/3 + 7 + charsetlen
1068 < maxcol - col) {
1069 fprintf(fo, "=?%s?B?",
1070 b&0200 ? charset : charset7);
1071 wr = mime_write_tob64(&cin, fo, 1);
1072 fwrite("?=", sizeof (char), 2, fo);
1073 wr += 7 + charsetlen;
1074 sz += wr, col += wr;
1075 if (wend < upper) {
1076 fwrite("\n ", sizeof (char),
1077 2, fo);
1078 sz += 2;
1079 col = 0;
1080 maxcol = 76;
1082 break;
1083 } else {
1084 if (col) {
1085 fprintf(fo, "\n ");
1086 sz += 2;
1087 col = 0;
1088 maxcol = 76;
1089 } else
1090 wend -= 4;
1094 } else {
1096 * Print the field word-wise in quoted-printable.
1098 broken = 0;
1099 for (wbeg = in->s; wbeg < upper; wbeg = wend) {
1100 lastspc = NULL;
1101 while (wbeg < upper && whitechar(*wbeg & 0377)) {
1102 lastspc = lastspc ? lastspc : wbeg;
1103 wbeg++;
1104 col++;
1105 broken = 0;
1107 if (wbeg == upper) {
1108 if (lastspc)
1109 while (lastspc < wbeg) {
1110 putc(*lastspc&0377, fo);
1111 lastspc++,
1112 sz++;
1114 break;
1116 mustquote = 0;
1117 b = 0;
1118 for (wend = wbeg;
1119 wend < upper && !whitechar(*wend & 0377);
1120 wend++) {
1121 b |= *wend;
1122 if (mustquote_hdr(wend, wend == wbeg,
1123 wbeg == &upper[-1]))
1124 mustquote++;
1126 if (mustquote || broken ||
1127 ((wend - wbeg) >= 74 && quoteany)) {
1128 for (;;) {
1129 cin.s = lastwordend ? lastwordend :
1130 wbeg;
1131 cin.l = wend - cin.s;
1132 mime_str_toqp(&cin, &cout,
1133 mustquote_inhdrq, 1);
1134 if ((wr = cout.l + charsetlen + 7)
1135 < maxcol - col) {
1136 if (lastspc)
1137 while (lastspc < wbeg) {
1138 putc(*lastspc
1139 &0377,
1140 fo);
1141 lastspc++,
1142 sz++;
1144 fprintf(fo, "=?%s?Q?", b&0200 ?
1145 charset : charset7);
1146 fwrite(cout.s, sizeof *cout.s,
1147 cout.l, fo);
1148 fwrite("?=", 1, 2, fo);
1149 sz += wr, col += wr;
1150 free(cout.s);
1151 break;
1152 } else {
1153 broken = 1;
1154 if (col) {
1155 putc('\n', fo);
1156 sz++;
1157 col = 0;
1158 maxcol = 76;
1159 if (lastspc == NULL) {
1160 putc(' ', fo);
1161 sz++;
1162 maxcol--;
1163 } else
1164 maxcol -= wbeg -
1165 lastspc;
1166 } else {
1167 wend -= 4;
1169 free(cout.s);
1172 lastwordend = wend;
1173 } else {
1174 if (col &&
1175 (size_t)(wend - wbeg) > maxcol - col) {
1176 putc('\n', fo);
1177 sz++;
1178 col = 0;
1179 maxcol = 76;
1180 if (lastspc == NULL) {
1181 putc(' ', fo);
1182 sz++;
1183 maxcol--;
1184 } else
1185 maxcol -= wbeg - lastspc;
1187 if (lastspc)
1188 while (lastspc < wbeg) {
1189 putc(*lastspc&0377, fo);
1190 lastspc++, sz++;
1192 wr = fwrite(wbeg, sizeof *wbeg,
1193 wend - wbeg, fo);
1194 sz += wr, col += wr;
1195 lastwordend = NULL;
1199 return sz;
1203 * Write len characters of the passed string to the passed file,
1204 * doing charset and header conversion.
1206 static size_t
1207 convhdra(char *str, size_t len, FILE *fp)
1209 #ifdef HAVE_ICONV
1210 char *ip, *op;
1211 size_t isz, osz;
1212 #endif
1213 struct str cin;
1214 size_t cbufsz;
1215 char *cbuf;
1216 size_t sz;
1218 cbuf = ac_alloc(cbufsz = 1);
1219 #ifdef HAVE_ICONV
1220 if (iconvd == (iconv_t)-1) {
1221 #endif
1222 cin.s = str;
1223 cin.l = len;
1224 #ifdef HAVE_ICONV
1225 } else {
1226 again: ip = str;
1227 isz = len;
1228 op = cbuf;
1229 osz = cbufsz;
1230 if (iconv_ft(iconvd, &ip, &isz, &op, &osz, 0) == (size_t)-1) {
1231 if (errno != E2BIG) {
1232 ac_free(cbuf);
1233 return 0;
1235 cbuf = ac_alloc(cbufsz += isz);
1236 goto again;
1238 cin.s = cbuf;
1239 cin.l = cbufsz - osz;
1241 #endif /* HAVE_ICONV */
1242 sz = mime_write_tohdr(&cin, fp);
1243 ac_free(cbuf);
1244 return sz;
1249 * Write an address to a header field.
1251 static size_t
1252 mime_write_tohdr_a(struct str *in, FILE *f)
1254 char *cp, *lastcp;
1255 size_t sz = 0;
1257 in->s[in->l] = '\0';
1258 lastcp = in->s;
1259 if ((cp = routeaddr(in->s)) != NULL && cp > lastcp) {
1260 sz += convhdra(lastcp, cp - lastcp, f);
1261 lastcp = cp;
1262 } else
1263 cp = in->s;
1264 for ( ; *cp; cp++) {
1265 switch (*cp) {
1266 case '(':
1267 sz += fwrite(lastcp, 1, cp - lastcp + 1, f);
1268 lastcp = ++cp;
1269 cp = skip_comment(cp);
1270 if (--cp > lastcp)
1271 sz += convhdra(lastcp, cp - lastcp, f);
1272 lastcp = cp;
1273 break;
1274 case '"':
1275 while (*cp) {
1276 if (*++cp == '"')
1277 break;
1278 if (*cp == '\\' && cp[1])
1279 cp++;
1281 break;
1284 if (cp > lastcp)
1285 sz += fwrite(lastcp, 1, cp - lastcp, f);
1286 return sz;
1289 static void
1290 addstr(char **buf, size_t *sz, size_t *pos, char *str, size_t len)
1292 *buf = srealloc(*buf, *sz += len);
1293 memcpy(&(*buf)[*pos], str, len);
1294 *pos += len;
1297 static void
1298 addconv(char **buf, size_t *sz, size_t *pos, char *str, size_t len)
1300 struct str in, out;
1302 in.s = str;
1303 in.l = len;
1304 mime_fromhdr(&in, &out, TD_ISPR|TD_ICONV);
1305 addstr(buf, sz, pos, out.s, out.l);
1306 free(out.s);
1310 * Interpret MIME strings in parts of an address field.
1312 char *
1313 mime_fromaddr(char *name)
1315 char *cp, *lastcp;
1316 char *res = NULL;
1317 size_t ressz = 1, rescur = 0;
1319 if (name == NULL || *name == '\0')
1320 return name;
1321 if ((cp = routeaddr(name)) != NULL && cp > name) {
1322 addconv(&res, &ressz, &rescur, name, cp - name);
1323 lastcp = cp;
1324 } else
1325 cp = lastcp = name;
1326 for ( ; *cp; cp++) {
1327 switch (*cp) {
1328 case '(':
1329 addstr(&res, &ressz, &rescur, lastcp, cp - lastcp + 1);
1330 lastcp = ++cp;
1331 cp = skip_comment(cp);
1332 if (--cp > lastcp)
1333 addconv(&res, &ressz, &rescur, lastcp,
1334 cp - lastcp);
1335 lastcp = cp;
1336 break;
1337 case '"':
1338 while (*cp) {
1339 if (*++cp == '"')
1340 break;
1341 if (*cp == '\\' && cp[1])
1342 cp++;
1344 break;
1347 if (cp > lastcp)
1348 addstr(&res, &ressz, &rescur, lastcp, cp - lastcp);
1349 res[rescur] = '\0';
1350 cp = savestr(res);
1351 free(res);
1352 return cp;
1356 * fwrite whilst adding prefix, if present.
1358 size_t
1359 prefixwrite(void *ptr, size_t size, size_t nmemb, FILE *f,
1360 char *prefix, size_t prefixlen)
1362 static FILE *lastf;
1363 static char lastc = '\n';
1364 size_t lpref, i, qfold = 0, lnlen = 0, rsz = size * nmemb, wsz = 0;
1365 char *p, *maxp, c;
1367 if (rsz == 0)
1368 return 0;
1370 if (prefixlen == 0)
1371 return fwrite(ptr, 1, rsz, f);
1373 if ((p = value("quote-fold")) != NULL) {
1374 qfold = (size_t)strtol(p, NULL, 10);
1375 if (qfold < prefixlen + 4)
1376 qfold = prefixlen + 4;
1377 --qfold; /* The newline escape */
1380 if (f != lastf || lastc == '\n') {
1381 wsz += fwrite(prefix, sizeof *prefix, prefixlen, f);
1382 lnlen = prefixlen;
1384 lastf = f;
1386 p = ptr;
1387 maxp = p + rsz;
1389 if (! qfold) {
1390 for (;;) {
1391 c = *p++;
1392 putc(c, f);
1393 wsz++;
1394 if (p == maxp)
1395 break;
1396 if (c != '\n')
1397 continue;
1398 wsz += fwrite(prefix, sizeof *prefix, prefixlen, f);
1400 } else {
1401 for (;;) {
1403 * After writing a real newline followed by our prefix,
1404 * compress the quoted prefixes
1406 for (lpref = 0; p != maxp;) {
1407 /* (c: keep cc happy) */
1408 for (c = i = 0; p + i < maxp;) {
1409 c = p[i++];
1410 if (blankspacechar(c))
1411 continue;
1412 if (! ISQUOTE(c))
1413 goto jquoteok;
1414 break;
1416 p += i;
1417 ++lpref;
1418 putc(c, f);
1419 ++wsz;
1421 jquoteok: lnlen += lpref;
1423 jsoftnl: /*
1424 * Search forward until either *quote-fold* or NL.
1425 * In the former case try to break at whitespace,
1426 * but only if that lies in the 2nd half of the data
1428 for (c = rsz = i = 0; p + i < maxp;) {
1429 c = p[i++];
1430 if (c == '\n')
1431 break;
1432 if (spacechar(c))
1433 rsz = i;
1434 if (lnlen + i >= qfold) {
1435 c = 0;
1436 if (rsz > qfold >> 1)
1437 i = rsz;
1438 break;
1442 if (i > 0) {
1443 wsz += fwrite(p, sizeof *p, i, f);
1444 p += i;
1446 if (p >= maxp)
1447 break;
1449 if (c != '\n') {
1450 putc('\\', f);
1451 putc('\n', f);
1452 wsz += 2;
1455 wsz += fwrite(prefix, sizeof *prefix, prefixlen, f);
1456 lnlen = prefixlen;
1457 if (c == '\n')
1458 continue;
1460 if ((i = lpref)) {
1461 for (; i > 0; ++wsz, ++lnlen, --i)
1462 (void)putc('.', f);
1463 (void)putc(' ', f);
1464 ++wsz;
1465 ++lnlen;
1467 goto jsoftnl;
1471 lastc = p[-1];
1472 return (wsz);
1476 * fwrite while checking for displayability.
1478 static size_t
1479 fwrite_td(void *ptr, size_t size, size_t nmemb, FILE *f, enum tdflags flags,
1480 char *prefix, size_t prefixlen)
1482 char *upper;
1483 size_t sz, csize;
1484 #ifdef HAVE_ICONV
1485 char *iptr, *nptr;
1486 size_t inleft, outleft;
1487 #endif
1488 char *mptr, *xmptr, *mlptr = NULL;
1489 size_t mptrsz;
1491 csize = size * nmemb;
1492 mptrsz = csize;
1493 mptr = xmptr = ac_alloc(mptrsz + 1);
1494 #ifdef HAVE_ICONV
1495 if ((flags & TD_ICONV) && iconvd != (iconv_t)-1) {
1496 again: inleft = csize;
1497 outleft = mptrsz;
1498 nptr = mptr;
1499 iptr = ptr;
1500 if (iconv_ft(iconvd, &iptr, &inleft, &nptr, &outleft, 0) ==
1501 (size_t)-1 &&
1502 errno == E2BIG) {
1503 iconv_ft(iconvd, NULL, NULL, NULL, NULL, 0);
1504 ac_free(mptr);
1505 mptrsz += inleft;
1506 mptr = ac_alloc(mptrsz + 1);
1507 goto again;
1509 nmemb = mptrsz - outleft;
1510 size = sizeof (char);
1511 ptr = mptr;
1512 csize = size * nmemb;
1513 } else
1514 #endif
1516 memcpy(mptr, ptr, csize);
1518 upper = mptr + csize;
1519 *upper = '\0';
1520 if (flags & TD_ISPR) {
1521 struct str in, out;
1522 in.s = mptr;
1523 in.l = csize;
1524 makeprint(&in, &out);
1525 mptr = mlptr = out.s;
1526 csize = out.l;
1528 if (flags & TD_DELCTRL)
1529 csize = delctrl(mptr, csize);
1530 sz = prefixwrite(mptr, sizeof *mptr, csize, f, prefix, prefixlen);
1531 ac_free(xmptr);
1532 free(mlptr);
1533 return sz;
1537 * fwrite performing the given MIME conversion.
1539 size_t
1540 mime_write(void *ptr, size_t size, FILE *f,
1541 enum conversion convert, enum tdflags dflags,
1542 char *prefix, size_t prefixlen,
1543 char **restp, size_t *restsizep)
1545 struct str in, out;
1546 size_t sz, csize;
1547 int is_text = 0;
1548 #ifdef HAVE_ICONV
1549 char mptr[LINESIZE * 6];
1550 char *iptr, *nptr;
1551 size_t inleft, outleft;
1552 #endif
1554 if (size == 0)
1555 return 0;
1556 csize = size;
1557 #ifdef HAVE_ICONV
1558 if (csize < sizeof mptr && (dflags & TD_ICONV)
1559 && iconvd != (iconv_t)-1
1560 && (convert == CONV_TOQP || convert == CONV_8BIT ||
1561 convert == CONV_TOB64 ||
1562 convert == CONV_TOHDR)) {
1563 inleft = csize;
1564 outleft = sizeof mptr;
1565 nptr = mptr;
1566 iptr = ptr;
1567 if (iconv_ft(iconvd, &iptr, &inleft,
1568 &nptr, &outleft, 0) != (size_t)-1) {
1569 in.l = sizeof mptr - outleft;
1570 in.s = mptr;
1571 } else {
1572 if (errno == EILSEQ || errno == EINVAL)
1573 invalid_seq(*iptr);
1574 return 0;
1576 } else {
1577 #endif
1578 in.s = ptr;
1579 in.l = csize;
1580 #ifdef HAVE_ICONV
1582 #endif
1583 switch (convert) {
1584 case CONV_FROMQP:
1585 mime_fromqp(&in, &out, 0);
1586 sz = fwrite_td(out.s, sizeof *out.s, out.l, f, dflags,
1587 prefix, prefixlen);
1588 free(out.s);
1589 break;
1590 case CONV_TOQP:
1591 sz = mime_write_toqp(&in, f, mustquote_body);
1592 break;
1593 case CONV_8BIT:
1594 sz = prefixwrite(in.s, sizeof *in.s, in.l, f,
1595 prefix, prefixlen);
1596 break;
1597 case CONV_FROMB64_T:
1598 is_text = 1;
1599 /*FALLTHROUGH*/
1600 case CONV_FROMB64:
1601 mime_fromb64_b(&in, &out, is_text, f);
1602 if (is_text && out.s[out.l-1] != '\n' && restp && restsizep) {
1603 *restp = ptr;
1604 *restsizep = size;
1605 sz = 0;
1606 } else {
1607 sz = fwrite_td(out.s, sizeof *out.s, out.l, f, dflags,
1608 prefix, prefixlen);
1610 free(out.s);
1611 break;
1612 case CONV_TOB64:
1613 sz = mime_write_tob64(&in, f, 0);
1614 break;
1615 case CONV_FROMHDR:
1616 mime_fromhdr(&in, &out, TD_ISPR|TD_ICONV);
1617 sz = fwrite_td(out.s, sizeof *out.s, out.l, f,
1618 dflags&TD_DELCTRL, prefix, prefixlen);
1619 free(out.s);
1620 break;
1621 case CONV_TOHDR:
1622 sz = mime_write_tohdr(&in, f);
1623 break;
1624 case CONV_TOHDR_A:
1625 sz = mime_write_tohdr_a(&in, f);
1626 break;
1627 default:
1628 sz = fwrite_td(in.s, sizeof *in.s, in.l, f, dflags,
1629 prefix, prefixlen);
1631 return sz;