8 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
9 static int b64_val
[256];
11 static void b64_init(void)
14 for (i
= 0; i
< 64; i
++)
15 b64_val
[(unsigned char) b64_ch
[i
]] = i
;
18 static int b64_dec(char *d
, char *s
)
21 v
|= b64_val
[(unsigned char) s
[0]];
23 v
|= b64_val
[(unsigned char) s
[1]];
25 v
|= b64_val
[(unsigned char) s
[2]];
27 v
|= b64_val
[(unsigned char) s
[3]];
34 return 3 - (s
[1] == '=') - (s
[2] == '=') - (s
[3] == '=');
37 char *base64(char *s
, int len
)
39 struct sbuf
*sb
= sbuf_make();
41 for (i
= 0; i
< len
; i
+= 3) {
42 unsigned c0
= (unsigned char) s
[i
];
43 unsigned c1
= i
+ 1 < len
? (unsigned char) s
[i
+ 1] : 0;
44 unsigned c2
= i
+ 2 < len
? (unsigned char) s
[i
+ 2] : 0;
45 unsigned v
= (c0
<< 16) | (c1
<< 8) | c2
;
46 if (i
> 0 && i
% (19 * 3) == 0)
47 sbuf_printf(sb
, "\n");
48 sbuf_printf(sb
, "%c", b64_ch
[(v
>> 18) & 0x3f]);
49 sbuf_printf(sb
, "%c", b64_ch
[(v
>> 12) & 0x3f]);
50 sbuf_printf(sb
, "%c", i
+ 1 < len
? b64_ch
[(v
>> 6) & 0x3f] : '=');
51 sbuf_printf(sb
, "%c", i
+ 2 < len
? b64_ch
[v
& 0x3f] : '=');
53 sbuf_printf(sb
, "\n");
57 static void dec_b64(struct sbuf
*sb
, char *s
, char *e
)
62 while (s
< e
&& isspace((unsigned char) *s
))
66 int n
= b64_dec(dst
, s
);
73 static int hexval(int c
)
75 if (c
>= '0' && c
<= '9')
77 if (c
>= 'A' && c
<= 'F')
79 if (c
>= 'a' && c
<= 'f')
84 static void dec_qp(struct sbuf
*sb
, char *s
, char *e
, int hdr
)
87 if (*s
== '=' && s
[1] == '\n') {
89 } else if (*s
== '=' && s
+ 2 < e
) {
90 sbuf_chr(sb
, (hexval(s
[1]) << 4) | hexval(s
[2]));
93 sbuf_chr(sb
, hdr
&& *s
== '_' ? ' ' : (unsigned char) *s
);
99 static void toutf8(char *charset
, struct sbuf
*dst
, char *src
, int len
);
101 static void msg_hdrdec2(struct sbuf
*sb
, char *hdr
, char *end
)
104 char *q1
= hdr
[0] == '=' && hdr
[1] == '?' ? hdr
+ 1 : NULL
;
105 char *q2
= q1
? memchr(q1
+ 1, '?', end
- q1
) : NULL
;
106 char *q3
= q2
? memchr(q2
+ 1, '?', end
- q2
) : NULL
;
107 char *q4
= q3
? memchr(q3
+ 1, '?', end
- q3
) : NULL
;
108 if (q1
&& q2
&& q3
&& q4
&& q4
[1] == '=') {
109 int c
= tolower((unsigned char) q2
[1]);
110 struct sbuf
*sb2
= sbuf_make();
112 int charset_len
= q2
- q1
- 1;
113 if (charset_len
> sizeof(charset
) - 1)
114 charset_len
= sizeof(charset
) - 1;
115 memcpy(charset
, q1
+ 1, charset_len
);
116 charset
[charset_len
] = '\0';
118 dec_b64(sb2
, q3
+ 1, q4
);
120 dec_qp(sb2
, q3
+ 1, q4
, 1);
122 while (isspace((unsigned char) *hdr
) && hdr
+ 1 < end
)
124 toutf8(charset
, sb
, sbuf_buf(sb2
), sbuf_len(sb2
));
127 sbuf_chr(sb
, (unsigned char) *hdr
++);
132 char *msg_hdrdec(char *hdr
)
136 msg_hdrdec2(sb
, hdr
, strchr(hdr
, '\0'));
137 return sbuf_done(sb
);
140 /* decoding mime messages */
142 #define MAXPARTS (1 << 3)
143 #define BOUNDLEN (1 << 7)
145 #define TYPE_TXT 0x01
146 #define TYPE_MPART 0x02
147 #define TYPE_ETC 0x04
155 char bound
[MAXPARTS
][BOUNDLEN
];
160 static void copy_till(struct mime
*m
, struct sbuf
*dst
, char *s
)
162 int len
= s
- m
->src
;
163 sbuf_mem(dst
, m
->src
, len
);
167 static void read_boundary(struct mime
*m
, char *s
, char *hdrend
)
169 char *bound
= m
->bound
[m
->depth
];
171 s
= memchr(s
, '=', hdrend
- s
);
177 e
= memchr(s
, '"', hdrend
- s
);
180 while (e
< hdrend
&& !isspace(*e
) && *e
!= ';')
187 memcpy(bound
+ 2, s
, e
- s
);
188 bound
[e
- s
+ 2] = '\0';
192 static char *read_charset(struct mime
*m
, char *s
, char *hdrend
)
194 if (!(s
= memchr(s
, '=', hdrend
- s
)))
196 if (s
+ 14 < hdrend
&& startswith(s
, "=\"windows-1256\""))
197 return "windows-1256";
198 if (s
+ 12 < hdrend
&& startswith(s
, "=\"iso-8859-6\""))
203 static char *hdr_nextfield(char *s
, char *e
)
205 while (s
&& s
< e
&& *s
!= ';')
207 if ((s
= memchr(s
, '"', e
- s
)))
209 return s
&& s
+ 2 < e
? s
+ 1 : NULL
;
212 static void read_hdrs(struct mime
*m
, struct sbuf
*dst
, int *type
, char **charset
)
218 while (s
&& s
< e
&& *s
!= '\n') {
219 char *n
= memchr(s
, '\n', e
- s
);
220 while (n
&& n
+ 1 < e
&& n
[1] != '\n' && isspace(n
[1]))
221 n
= memchr(n
+ 1, '\n', e
- n
- 1);
224 if (startswith(s
, "Content-Type:")) {
225 char *key
= strchr(s
, ':') + 1;
226 char *hdrend
= s
+ hdrlen(s
, e
- s
);
228 while (key
< hdrend
&& isspace((unsigned char) *key
))
230 if (startswith(key
, "text"))
232 if (startswith(key
, "multipart"))
234 if (startswith(key
, "charset"))
235 *charset
= read_charset(m
, key
, hdrend
);
236 if (startswith(key
, "boundary"))
237 read_boundary(m
, key
, hdrend
);
238 key
= hdr_nextfield(key
, hdrend
);
241 if (startswith(s
, "Content-Transfer-Encoding:")) {
242 char *key
= strchr(s
, ':') + 1;
243 char *hdrend
= s
+ hdrlen(s
, e
- s
);
245 while (key
< hdrend
&& isspace(*key
))
247 if (startswith(key
, "quoted-printable"))
249 if (startswith(key
, "base64"))
251 key
= hdr_nextfield(key
, hdrend
);
254 msg_hdrdec2(dst
, s
, n
);
261 static int is_bound(struct mime
*m
, char *s
)
263 return startswith(s
, m
->bound
[m
->depth
- 1]);
266 static void read_bound(struct mime
*m
, struct sbuf
*dst
)
269 int len
= strlen(m
->bound
[m
->depth
- 1]);
270 if (s
[len
] == '-' && s
[len
+ 1] == '-')
272 s
= memchr(s
, '\n', m
->end
- s
);
273 s
= s
? s
+ 1 : m
->end
;
274 copy_till(m
, dst
, s
);
277 static char *find_bound(struct mime
*m
)
284 if (!(s
= memchr(s
, '\n', e
- s
)))
291 static void read_body(struct mime
*m
, struct sbuf
*dst
, int type
)
293 char *end
= m
->depth
? find_bound(m
) : m
->end
;
294 if (~type
& TYPE_TXT
) {
295 copy_till(m
, dst
, end
);
299 dec_qp(dst
, m
->src
, end
, 0);
303 if (type
& ENC_B64
) {
304 dec_b64(dst
, m
->src
, end
);
309 copy_till(m
, dst
, end
);
312 int msg_demime(char *msg
, long msglen
, char **mod
, long *modlen
)
314 struct sbuf
*dst
= sbuf_make();
317 m
.end
= msg
+ msglen
;
319 while ((m
.depth
&& m
.src
< m
.end
) || m
.src
== msg
) {
322 struct sbuf
*sb
= sbuf_make();
323 read_hdrs(&m
, sb
, &type
, &charset
);
324 read_body(&m
, sb
, type
);
325 toutf8(charset
, dst
, sbuf_buf(sb
), sbuf_len(sb
));
331 *modlen
= sbuf_len(dst
) - 1;
332 *mod
= sbuf_done(dst
);
336 static char *cp1256
[256] = {
337 [0x80] = "€", [0x81] = "پ", [0x82] = "‚", [0x83] = "ƒ",
338 [0x84] = "„", [0x85] = "…", [0x86] = "†", [0x87] = "‡",
339 [0x88] = "ˆ", [0x89] = "‰", [0x8A] = "ٹ", [0x8B] = "‹",
340 [0x8C] = "Œ", [0x8D] = "چ", [0x8E] = "ژ", [0x8F] = "ڈ",
341 [0x90] = "گ", [0x91] = "‘", [0x92] = "’", [0x93] = "“",
342 [0x94] = "”", [0x95] = "•", [0x96] = "–", [0x97] = "—",
343 [0x98] = "ک", [0x99] = "™", [0x9A] = "ڑ", [0x9B] = "›",
344 [0x9C] = "œ", [0x9D] = "", [0x9E] = "", [0x9F] = "ں",
345 [0xA1] = "،", [0xAA] = "ھ", [0xBA] = "؛", [0xBF] = "؟",
346 [0xC0] = "ہ", [0xC1] = "ء", [0xC2] = "آ", [0xC3] = "أ",
347 [0xC4] = "ؤ", [0xC5] = "إ", [0xC6] = "ئ", [0xC7] = "ا",
348 [0xC8] = "ب", [0xC9] = "ة", [0xCA] = "ت", [0xCB] = "ث",
349 [0xCC] = "ج", [0xCD] = "ح", [0xCE] = "خ", [0xCF] = "د",
350 [0xD0] = "ذ", [0xD1] = "ر", [0xD2] = "ز", [0xD3] = "س",
351 [0xD4] = "ش", [0xD5] = "ص", [0xD6] = "ض", [0xD7] = "×",
352 [0xD8] = "ط", [0xD9] = "ظ", [0xDA] = "ع", [0xDB] = "غ",
353 [0xDC] = "ـ", [0xDD] = "ف", [0xDE] = "ق", [0xDF] = "ك",
354 [0xE0] = "à", [0xE1] = "ل", [0xE2] = "â", [0xE3] = "م",
355 [0xE4] = "ن", [0xE5] = "ه", [0xE6] = "و", [0xE7] = "ç",
356 [0xE8] = "è", [0xE9] = "é", [0xEA] = "ê", [0xEB] = "ë",
357 [0xEC] = "ى", [0xED] = "ي", [0xEE] = "î", [0xEF] = "ï",
358 [0xF0] = "ً", [0xF1] = "ٌ", [0xF2] = "ٍ", [0xF3] = "َ",
359 [0xF4] = "ô", [0xF5] = "ُ", [0xF6] = "ِ", [0xF7] = "÷",
360 [0xF8] = "ّ", [0xF9] = "ù", [0xFA] = "ْ", [0xFB] = "û",
361 [0xFC] = "ü", [0xFD] = "", [0xFE] = "", [0xFF] = "ے",
364 static char *iso8859_6
[256] = {
365 [0xAC] = "،", [0xBB] = "؛", [0xBF] = "؟", [0xC1] = "ء",
366 [0xC2] = "آ", [0xC3] = "أ", [0xC4] = "ؤ", [0xC5] = "إ",
367 [0xC6] = "ئ", [0xC7] = "ا", [0xC8] = "ب", [0xC9] = "ة",
368 [0xCA] = "ت", [0xCB] = "ث", [0xCC] = "ج", [0xCD] = "ح",
369 [0xCE] = "خ", [0xCF] = "د", [0xD0] = "ذ", [0xD1] = "ر",
370 [0xD2] = "ز", [0xD3] = "س", [0xD4] = "ش", [0xD5] = "ص",
371 [0xD6] = "ض", [0xD7] = "ط", [0xD8] = "ظ", [0xD9] = "ع",
372 [0xDA] = "غ", [0xE0] = "ـ", [0xE1] = "ف", [0xE2] = "ق",
373 [0xE3] = "ك", [0xE4] = "ل", [0xE5] = "م", [0xE6] = "ن",
374 [0xE7] = "ه", [0xE8] = "و", [0xE9] = "ى", [0xEA] = "ي",
375 [0xEB] = "ً", [0xEC] = "ٌ", [0xED] = "ٍ", [0xEE] = "َ",
376 [0xEF] = "ُ", [0xF0] = "ِ", [0xF1] = "ّ", [0xF2] = "ْ",
379 static void toutf8(char *charset
, struct sbuf
*dst
, char *src
, int len
)
383 if (charset
&& !strcmp("windows-1256", charset
))
385 if (charset
&& !strcmp("iso-8859-6", charset
))
388 for (i
= 0; i
< len
; i
++) {
389 int c
= (unsigned char) src
[i
];
391 sbuf_str(dst
, map
[c
]);
396 sbuf_mem(dst
, src
, len
);