regex: updates from neatvi
[neatmail.git] / mime.c
blob9a5fa6c125d3510c5519c25391d342d431906424
1 #include <ctype.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include "mail.h"
7 static char *b64_ch =
8 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
9 static int b64_val[256];
11 static void b64_init(void)
13 int i;
14 for (i = 0; i < 64; i++)
15 b64_val[(unsigned char) b64_ch[i]] = i;
18 static int b64_dec(char *d, char *s)
20 unsigned v = 0;
21 v |= b64_val[(unsigned char) s[0]];
22 v <<= 6;
23 v |= b64_val[(unsigned char) s[1]];
24 v <<= 6;
25 v |= b64_val[(unsigned char) s[2]];
26 v <<= 6;
27 v |= b64_val[(unsigned char) s[3]];
29 d[2] = v & 0xff;
30 v >>= 8;
31 d[1] = v & 0xff;
32 v >>= 8;
33 d[0] = v & 0xff;
34 return 3 - (s[1] == '=') - (s[2] == '=') - (s[3] == '=');
37 char *base64(char *s, int len)
39 struct sbuf *sb = sbuf_make();
40 int i;
41 for (i = 0; i < len; i += 3) {
42 unsigned c0 = (unsigned char) s[i];
43 unsigned c1 = i + 1 < len ? (unsigned char) s[i + 1] : 0;
44 unsigned c2 = i + 2 < len ? (unsigned char) s[i + 2] : 0;
45 unsigned v = (c0 << 16) | (c1 << 8) | c2;
46 if (i > 0 && i % (19 * 3) == 0)
47 sbuf_printf(sb, "\n");
48 sbuf_printf(sb, "%c", b64_ch[(v >> 18) & 0x3f]);
49 sbuf_printf(sb, "%c", b64_ch[(v >> 12) & 0x3f]);
50 sbuf_printf(sb, "%c", i + 1 < len ? b64_ch[(v >> 6) & 0x3f] : '=');
51 sbuf_printf(sb, "%c", i + 2 < len ? b64_ch[v & 0x3f] : '=');
53 sbuf_printf(sb, "\n");
54 return sbuf_done(sb);
57 static void dec_b64(struct sbuf *sb, char *s, char *e)
59 if (!b64_val['B'])
60 b64_init();
61 while (s + 4 <= e) {
62 while (s < e && isspace((unsigned char) *s))
63 s++;
64 if (s < e) {
65 char dst[4];
66 int n = b64_dec(dst, s);
67 s += 4;
68 sbuf_mem(sb, dst, n);
73 static int hexval(int c)
75 if (c >= '0' && c <= '9')
76 return c - '0';
77 if (c >= 'A' && c <= 'F')
78 return 10 + c - 'A';
79 if (c >= 'a' && c <= 'f')
80 return 10 + c - 'a';
81 return 0;
84 static void dec_qp(struct sbuf *sb, char *s, char *e, int hdr)
86 while (s < e) {
87 if (*s == '=' && s[1] == '\n') {
88 s += 2;
89 } else if (*s == '=' && s + 2 < e) {
90 sbuf_chr(sb, (hexval(s[1]) << 4) | hexval(s[2]));
91 s += 3;
92 } else {
93 sbuf_chr(sb, hdr && *s == '_' ? ' ' : (unsigned char) *s);
94 s++;
99 static void toutf8(char *charset, struct sbuf *dst, char *src, int len);
101 static void msg_hdrdec2(struct sbuf *sb, char *hdr, char *end)
103 while (hdr < end) {
104 char *q1 = hdr[0] == '=' && hdr[1] == '?' ? hdr + 1 : NULL;
105 char *q2 = q1 ? memchr(q1 + 1, '?', end - q1) : NULL;
106 char *q3 = q2 ? memchr(q2 + 1, '?', end - q2) : NULL;
107 char *q4 = q3 ? memchr(q3 + 1, '?', end - q3) : NULL;
108 if (q1 && q2 && q3 && q4 && q4[1] == '=') {
109 int c = tolower((unsigned char) q2[1]);
110 struct sbuf *sb2 = sbuf_make();
111 char charset[128];
112 int charset_len = q2 - q1 - 1;
113 if (charset_len > sizeof(charset) - 1)
114 charset_len = sizeof(charset) - 1;
115 memcpy(charset, q1 + 1, charset_len);
116 charset[charset_len] = '\0';
117 if (c == 'b')
118 dec_b64(sb2, q3 + 1, q4);
119 else
120 dec_qp(sb2, q3 + 1, q4, 1);
121 hdr = q4 + 2;
122 while (isspace((unsigned char) *hdr) && hdr + 1 < end)
123 hdr++;
124 toutf8(charset, sb, sbuf_buf(sb2), sbuf_len(sb2));
125 sbuf_free(sb2);
126 } else {
127 sbuf_chr(sb, (unsigned char) *hdr++);
132 char *msg_hdrdec(char *hdr)
134 struct sbuf *sb;
135 sb = sbuf_make();
136 msg_hdrdec2(sb, hdr, strchr(hdr, '\0'));
137 return sbuf_done(sb);
140 /* decoding mime messages */
142 #define MAXPARTS (1 << 3)
143 #define BOUNDLEN (1 << 7)
145 #define TYPE_TXT 0x01
146 #define TYPE_MPART 0x02
147 #define TYPE_ETC 0x04
148 #define ENC_B8 0x10
149 #define ENC_QP 0x20
150 #define ENC_B64 0x40
152 struct mime {
153 int depth;
154 int part[MAXPARTS];
155 char bound[MAXPARTS][BOUNDLEN];
156 char *src;
157 char *end;
160 static void copy_till(struct mime *m, struct sbuf *dst, char *s)
162 int len = s - m->src;
163 sbuf_mem(dst, m->src, len);
164 m->src += len;
167 static void read_boundary(struct mime *m, char *s, char *hdrend)
169 char *bound = m->bound[m->depth];
170 char *e;
171 s = memchr(s, '=', hdrend - s);
172 if (!s)
173 return;
174 s++;
175 if (*s == '"') {
176 s++;
177 e = memchr(s, '"', hdrend - s);
178 } else {
179 e = s;
180 while (e < hdrend && !isspace(*e) && *e != ';')
181 e++;
183 if (!e)
184 return;
185 bound[0] = '-';
186 bound[1] = '-';
187 memcpy(bound + 2, s, e - s);
188 bound[e - s + 2] = '\0';
189 m->depth++;
192 static char *read_charset(struct mime *m, char *s, char *hdrend)
194 if (!(s = memchr(s, '=', hdrend - s)))
195 return NULL;
196 if (s + 14 < hdrend && startswith(s, "=\"windows-1256\""))
197 return "windows-1256";
198 if (s + 12 < hdrend && startswith(s, "=\"iso-8859-6\""))
199 return "iso-8859-6";
200 return NULL;
203 static char *hdr_nextfield(char *s, char *e)
205 while (s && s < e && *s != ';')
206 if (*s++ == '"')
207 if ((s = memchr(s, '"', e - s)))
208 s++;
209 return s && s + 2 < e ? s + 1 : NULL;
212 static void read_hdrs(struct mime *m, struct sbuf *dst, int *type, char **charset)
214 char *s = m->src;
215 char *e = m->end;
216 *type = 0;
217 *charset = NULL;
218 while (s && s < e && *s != '\n') {
219 char *n = memchr(s, '\n', e - s);
220 while (n && n + 1 < e && n[1] != '\n' && isspace(n[1]))
221 n = memchr(n + 1, '\n', e - n - 1);
222 if (!n++)
223 break;
224 if (startswith(s, "Content-Type:")) {
225 char *key = strchr(s, ':') + 1;
226 char *hdrend = s + hdrlen(s, e - s);
227 while (key) {
228 while (key < hdrend && isspace((unsigned char) *key))
229 key++;
230 if (startswith(key, "text"))
231 *type |= TYPE_TXT;
232 if (startswith(key, "multipart"))
233 *type |= TYPE_MPART;
234 if (startswith(key, "charset"))
235 *charset = read_charset(m, key, hdrend);
236 if (startswith(key, "boundary"))
237 read_boundary(m, key, hdrend);
238 key = hdr_nextfield(key, hdrend);
241 if (startswith(s, "Content-Transfer-Encoding:")) {
242 char *key = strchr(s, ':') + 1;
243 char *hdrend = s + hdrlen(s, e - s);
244 while (key) {
245 while (key < hdrend && isspace(*key))
246 key++;
247 if (startswith(key, "quoted-printable"))
248 *type |= ENC_QP;
249 if (startswith(key, "base64"))
250 *type |= ENC_B64;
251 key = hdr_nextfield(key, hdrend);
254 msg_hdrdec2(dst, s, n);
255 s = n;
257 sbuf_chr(dst, '\n');
258 m->src = s + 1;
261 static int is_bound(struct mime *m, char *s)
263 return startswith(s, m->bound[m->depth - 1]);
266 static void read_bound(struct mime *m, struct sbuf *dst)
268 char *s = m->src;
269 int len = strlen(m->bound[m->depth - 1]);
270 if (s[len] == '-' && s[len + 1] == '-')
271 m->depth--;
272 s = memchr(s, '\n', m->end - s);
273 s = s ? s + 1 : m->end;
274 copy_till(m, dst, s);
277 static char *find_bound(struct mime *m)
279 char *s = m->src;
280 char *e = m->end;
281 while (s < e) {
282 if (is_bound(m, s))
283 return s;
284 if (!(s = memchr(s, '\n', e - s)))
285 break;
286 s++;
288 return e;
291 static void read_body(struct mime *m, struct sbuf *dst, int type)
293 char *end = m->depth ? find_bound(m) : m->end;
294 if (~type & TYPE_TXT) {
295 copy_till(m, dst, end);
296 return;
298 if (type & ENC_QP) {
299 dec_qp(dst, m->src, end, 0);
300 m->src = end;
301 return;
303 if (type & ENC_B64) {
304 dec_b64(dst, m->src, end);
305 sbuf_chr(dst, '\n');
306 m->src = end;
307 return;
309 copy_till(m, dst, end);
312 int msg_demime(char *msg, long msglen, char **mod, long *modlen)
314 struct sbuf *dst = sbuf_make();
315 struct mime m;
316 m.src = msg;
317 m.end = msg + msglen;
318 m.depth = 0;
319 while ((m.depth && m.src < m.end) || m.src == msg) {
320 int type;
321 char *charset;
322 struct sbuf *sb = sbuf_make();
323 read_hdrs(&m, sb, &type, &charset);
324 read_body(&m, sb, type);
325 toutf8(charset, dst, sbuf_buf(sb), sbuf_len(sb));
326 sbuf_free(sb);
327 if (m.depth)
328 read_bound(&m, dst);
330 sbuf_chr(dst, '\0');
331 *modlen = sbuf_len(dst) - 1;
332 *mod = sbuf_done(dst);
333 return 0;
336 static char *cp1256[256] = {
337 [0x80] = "€", [0x81] = "پ", [0x82] = "‚", [0x83] = "ƒ",
338 [0x84] = "„", [0x85] = "…", [0x86] = "†", [0x87] = "‡",
339 [0x88] = "ˆ", [0x89] = "‰", [0x8A] = "ٹ", [0x8B] = "‹",
340 [0x8C] = "Œ", [0x8D] = "چ", [0x8E] = "ژ", [0x8F] = "ڈ",
341 [0x90] = "گ", [0x91] = "‘", [0x92] = "’", [0x93] = "“",
342 [0x94] = "”", [0x95] = "•", [0x96] = "–", [0x97] = "—",
343 [0x98] = "ک", [0x99] = "™", [0x9A] = "ڑ", [0x9B] = "›",
344 [0x9C] = "œ", [0x9D] = "‌", [0x9E] = "‍", [0x9F] = "ں",
345 [0xA1] = "،", [0xAA] = "ھ", [0xBA] = "؛", [0xBF] = "؟",
346 [0xC0] = "ہ", [0xC1] = "ء", [0xC2] = "آ", [0xC3] = "أ",
347 [0xC4] = "ؤ", [0xC5] = "إ", [0xC6] = "ئ", [0xC7] = "ا",
348 [0xC8] = "ب", [0xC9] = "ة", [0xCA] = "ت", [0xCB] = "ث",
349 [0xCC] = "ج", [0xCD] = "ح", [0xCE] = "خ", [0xCF] = "د",
350 [0xD0] = "ذ", [0xD1] = "ر", [0xD2] = "ز", [0xD3] = "س",
351 [0xD4] = "ش", [0xD5] = "ص", [0xD6] = "ض", [0xD7] = "×",
352 [0xD8] = "ط", [0xD9] = "ظ", [0xDA] = "ع", [0xDB] = "غ",
353 [0xDC] = "ـ", [0xDD] = "ف", [0xDE] = "ق", [0xDF] = "ك",
354 [0xE0] = "à", [0xE1] = "ل", [0xE2] = "â", [0xE3] = "م",
355 [0xE4] = "ن", [0xE5] = "ه", [0xE6] = "و", [0xE7] = "ç",
356 [0xE8] = "è", [0xE9] = "é", [0xEA] = "ê", [0xEB] = "ë",
357 [0xEC] = "ى", [0xED] = "ي", [0xEE] = "î", [0xEF] = "ï",
358 [0xF0] = "ً", [0xF1] = "ٌ", [0xF2] = "ٍ", [0xF3] = "َ",
359 [0xF4] = "ô", [0xF5] = "ُ", [0xF6] = "ِ", [0xF7] = "÷",
360 [0xF8] = "ّ", [0xF9] = "ù", [0xFA] = "ْ", [0xFB] = "û",
361 [0xFC] = "ü", [0xFD] = "‎", [0xFE] = "‏", [0xFF] = "ے",
364 static char *iso8859_6[256] = {
365 [0xAC] = "،", [0xBB] = "؛", [0xBF] = "؟", [0xC1] = "ء",
366 [0xC2] = "آ", [0xC3] = "أ", [0xC4] = "ؤ", [0xC5] = "إ",
367 [0xC6] = "ئ", [0xC7] = "ا", [0xC8] = "ب", [0xC9] = "ة",
368 [0xCA] = "ت", [0xCB] = "ث", [0xCC] = "ج", [0xCD] = "ح",
369 [0xCE] = "خ", [0xCF] = "د", [0xD0] = "ذ", [0xD1] = "ر",
370 [0xD2] = "ز", [0xD3] = "س", [0xD4] = "ش", [0xD5] = "ص",
371 [0xD6] = "ض", [0xD7] = "ط", [0xD8] = "ظ", [0xD9] = "ع",
372 [0xDA] = "غ", [0xE0] = "ـ", [0xE1] = "ف", [0xE2] = "ق",
373 [0xE3] = "ك", [0xE4] = "ل", [0xE5] = "م", [0xE6] = "ن",
374 [0xE7] = "ه", [0xE8] = "و", [0xE9] = "ى", [0xEA] = "ي",
375 [0xEB] = "ً", [0xEC] = "ٌ", [0xED] = "ٍ", [0xEE] = "َ",
376 [0xEF] = "ُ", [0xF0] = "ِ", [0xF1] = "ّ", [0xF2] = "ْ",
379 static void toutf8(char *charset, struct sbuf *dst, char *src, int len)
381 int i;
382 char **map = NULL;
383 if (charset && !strcmp("windows-1256", charset))
384 map = cp1256;
385 if (charset && !strcmp("iso-8859-6", charset))
386 map = iso8859_6;
387 if (map) {
388 for (i = 0; i < len; i++) {
389 int c = (unsigned char) src[i];
390 if (map[c])
391 sbuf_str(dst, map[c]);
392 else
393 sbuf_chr(dst, c);
395 } else {
396 sbuf_mem(dst, src, len);