Don't return Variant& from Array functions
[hiphop-php.git] / hphp / runtime / ext / mailparse / mime.cpp
blobda828cbecd088f552067ea14e7f225db440c8f19
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/mailparse/mime.h"
19 #include "hphp/runtime/ext/stream/ext_stream.h"
20 #include "hphp/runtime/base/array-init.h"
21 #include "hphp/runtime/base/builtin-functions.h"
22 #include "hphp/runtime/base/mem-file.h"
23 #include "hphp/runtime/base/runtime-error.h"
25 #define MAXLEVELS 20
26 #define MAXPARTS 300
28 namespace HPHP {
29 ///////////////////////////////////////////////////////////////////////////////
31 MimePart::MimeHeader::MimeHeader()
32 : m_empty(true) {
35 MimePart::MimeHeader::MimeHeader(const char *value)
36 : m_empty(false) {
37 assert(value);
38 m_attributes = Array::Create();
39 m_value = String(value, CopyString);
42 MimePart::MimeHeader::MimeHeader(php_rfc822_tokenized_t *toks)
43 : m_empty(false) {
44 int i, first_semi, next_semi, comments_before_semi, netscape_bug = 0;
45 String name_buf;
46 StringBuffer value_buf;
47 bool is_rfc2231_name = false;
48 char *check_name;
49 int charset_p = 0, prevcharset_p = 0;
50 bool namechanged = false, currentencoded = false;
52 m_attributes = Array::Create();
54 /* php_rfc822_print_tokens(toks); */
55 /* look for optional ; which separates optional attributes from the main
56 value */
57 for (first_semi = 2; first_semi < toks->ntokens; first_semi++)
58 if (toks->tokens[first_semi].token == ';') break;
60 m_value = String(php_rfc822_recombine_tokens
61 (toks, 2, first_semi - 2,
62 PHP_RFC822_RECOMBINE_STRTOLOWER |
63 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
65 if (first_semi < toks->ntokens) first_semi++;
67 /* Netscape Bug: Messenger sometimes omits the semi when wrapping the
68 * the header.
69 * That means we have to be even more clever than the spec says that
70 * we need to :-/
71 * */
72 while (first_semi < toks->ntokens) {
73 /* find the next ; */
74 comments_before_semi = 0;
75 for (next_semi = first_semi; next_semi < toks->ntokens; next_semi++) {
76 if (toks->tokens[next_semi].token == ';') break;
77 if (toks->tokens[next_semi].token == '(') comments_before_semi++;
80 i = first_semi;
81 if (i < next_semi) {
82 i++;
84 /* ignore comments */
85 while (i < next_semi && toks->tokens[i].token == '(') {
86 i++;
89 if (i < next_semi && toks->tokens[i].token == '=') {
90 /* Here, next_semi --> "name" and i --> "=", so skip "=" sign */
91 i++;
93 /* count those tokens; we expect "token = token" (3 tokens); if there
94 * are more than that, then something is quite possibly wrong
95 * - Netscape Bug! */
96 if (next_semi < toks->ntokens && toks->tokens[next_semi].token != ';'
97 && next_semi - first_semi - comments_before_semi > 3) {
98 next_semi = i + 1;
99 netscape_bug = 1;
102 String name(php_rfc822_recombine_tokens
103 (toks, first_semi, 1,
104 PHP_RFC822_RECOMBINE_STRTOLOWER|
105 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
106 String value(php_rfc822_recombine_tokens
107 (toks, i, next_semi - i,
108 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
110 /* support rfc2231 mime parameter value
112 * Parameter Value Continuations:
114 * Content-Type: message/external-body; access-type=URL;
115 * URL*0="ftp://";
116 * URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
118 * is semantically identical to
120 * Content-Type: message/external-body; access-type=URL;
121 * URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
123 * Original rfc2231 support by IceWarp Ltd. <info@icewarp.com>
125 check_name = const_cast<char*>(strchr(name.data(), '*'));
126 if (check_name) {
127 currentencoded = true;
129 /* Is last char * - charset encoding */
130 charset_p = (name[name.size() -1] == '*');
132 /* Leave only attribute name without * */
133 *check_name = 0;
135 /* New item or continuous */
136 if (name_buf.isNull()) {
137 namechanged = false;
138 name_buf = name;
139 } else {
140 namechanged = (name_buf != name);
141 if (!namechanged) {
142 name.clear();
146 /* Check if name changed*/
147 if (!namechanged) {
149 /* Append string to buffer - check if to be encoded... */
150 rfc2231_to_mime(value_buf, (char*)value.data(), charset_p,
151 prevcharset_p);
153 /* Mark previous */
154 prevcharset_p = charset_p;
157 is_rfc2231_name = true;
160 /* Last item was encoded */
161 if (is_rfc2231_name) {
162 /* Name not null and name differs with new name*/
163 if (!name.empty() && name_buf != name) {
164 /* Finalize packet */
165 rfc2231_to_mime(value_buf, NULL, 0, prevcharset_p);
167 m_attributes.set(name_buf, value_buf.detach());
168 value_buf.clear();
170 prevcharset_p = 0;
171 is_rfc2231_name = false;
172 name_buf.clear();
174 /* New non encoded name*/
175 if (!currentencoded) {
176 /* Add string*/
177 m_attributes.set(name, value);
178 } else { /* Encoded name changed*/
179 if (namechanged) {
180 /* Append string to buffer - check if to be encoded... */
181 rfc2231_to_mime(value_buf, (char*)value.data(), charset_p,
182 prevcharset_p);
184 /* Mark */
185 is_rfc2231_name = true;
186 name_buf = name;
187 prevcharset_p = charset_p;
191 namechanged = false;
193 } else {
194 m_attributes.set(name, value);
199 if (next_semi < toks->ntokens && !netscape_bug) {
200 next_semi++;
203 first_semi = next_semi;
204 netscape_bug = 0;
207 if (is_rfc2231_name) {
208 /* Finalize packet */
209 rfc2231_to_mime(value_buf, NULL, 0, prevcharset_p);
210 m_attributes.set(name_buf, value_buf.detach());
214 void MimePart::MimeHeader::clear() {
215 m_empty = true;
216 m_value.clear();
217 m_attributes.reset();
220 Variant MimePart::MimeHeader::get(const String& attrname) {
221 return m_attributes[attrname];
224 void MimePart::MimeHeader::getAll(Array &ret, const String& valuelabel,
225 const String& attrprefix) {
226 for (ArrayIter iter(m_attributes); iter; ++iter) {
227 ret.set(attrprefix + iter.first().toString(), iter.second());
230 /* do this last so that a bogus set of headers like this:
231 * Content-Type: multipart/related;
232 * boundary="----=_NextPart_00_0017_01C091F4.1B5EF6B0";
233 * type="text/html"
235 * doesn't overwrite content-type with the type="text/html"
236 * value.
237 * */
238 ret.set(valuelabel, m_value);
241 void MimePart::MimeHeader::rfc2231_to_mime(StringBuffer &value_buf,
242 char* value,
243 int charset_p, int prevcharset_p) {
244 char *strp, *startofvalue = NULL;
245 int quotes = 0;
247 /* Process string, get positions and replace */
248 /* Set to start of buffer*/
249 if (charset_p) {
251 /* Previous charset already set so only convert %nn to =nn*/
252 if (prevcharset_p) quotes=2;
254 strp = value;
255 while (*strp) {
257 /* Quote handling*/
258 if (*strp == '\'') {
259 if (quotes <= 1) {
261 /* End of charset*/
262 if (quotes == 0) {
263 *strp=0;
264 } else {
265 startofvalue = strp+1;
268 quotes++;
270 } else {
271 /* Replace % with = - quoted printable*/
272 if (*strp == '%' && quotes==2) {
273 *strp = '=';
276 strp++;
280 /* If first encoded token*/
281 if (charset_p && !prevcharset_p && startofvalue) {
282 value_buf.append("=?");
283 value_buf.append(value);
284 value_buf.append("?Q?");
285 value_buf.append(startofvalue);
288 /* If last encoded token*/
289 if (prevcharset_p && !charset_p) {
290 value_buf.append("?=");
293 /* Append value*/
294 if ((!charset_p || (prevcharset_p && charset_p)) && value) {
295 value_buf.append(value);
299 ///////////////////////////////////////////////////////////////////////////////
301 MimePart::MimePart()
302 : m_startpos(0), m_endpos(0), m_bodystart(0), m_bodyend(0),
303 m_nlines(0), m_nbodylines(0) {
304 m_headers = Array::Create();
306 /* begin in header parsing mode */
307 m_parsedata.in_header = true;
308 m_parsedata.is_dummy = false;
309 m_parsedata.completed = false;
312 ///////////////////////////////////////////////////////////////////////////////
313 // enumeration
315 bool MimePart::enumeratePartsImpl(Enumerator *top, Enumerator **child,
316 PFUNC_ENUMERATOR callback, void *ptr) {
317 *child = NULL;
318 if (!(this->*callback)(top, ptr)) return false;
320 Enumerator next;
321 *child = &next;
322 next.id = 1;
323 if (!strncasecmp(m_content_type.m_value.data(), "multipart/", 10)) {
324 next.id = 0;
327 for (ArrayIter iter(m_children); iter; ++iter) {
328 if (next.id) {
329 auto childpart = cast<MimePart>(iter.second());
330 if (!childpart->enumeratePartsImpl(top, &next.next, callback, ptr)) {
331 return false;
334 next.id++;
336 return true;
339 void MimePart::enumerateParts(PFUNC_ENUMERATOR callback, void *ptr) {
340 Enumerator top;
341 top.id = 1;
342 enumeratePartsImpl(&top, &top.next, callback, ptr);
345 struct find_part_struct {
346 const char *searchfor;
347 MimePart *foundpart;
350 bool MimePart::getStructure(Enumerator *id, void *ptr) {
351 char intbuf[16];
352 int len, i = 0;
353 int buf_size = 1024;
354 char *buf = (char*)malloc(buf_size);
355 buf[0] = '\0';
356 while (id && i < buf_size) {
357 sprintf(intbuf, "%d", id->id);
358 len = strlen(intbuf);
359 if (len > (buf_size-i)) {
360 raise_warning("too many nested sections in message");
361 free(buf);
362 return false;
364 if ((i + len + 1) >= buf_size) {
365 buf_size = buf_size << 1;
366 buf = (char*)realloc(buf, buf_size);
367 if (!buf) {
368 raise_fatal_error(
369 folly::sformat("The structure buffer has been exceeded "
370 "({}). Please try decreasing the nesting "
371 "depth of messages and report this to the "
372 "developers.", buf_size).c_str());
375 sprintf(&buf[i], "%s%c", intbuf, id->next ? '.' : '\0');
376 i += len + (id->next ? 1 : 0);
377 id = id->next;
379 ((Array*)ptr)->append(String(buf, AttachString));
380 return true;
383 Array MimePart::getStructure() {
384 Array ret = Array::Create();
385 enumerateParts(&MimePart::getStructure, &ret);
386 return ret;
389 bool MimePart::findPart(Enumerator *id, void *ptr) {
390 struct find_part_struct *find = (find_part_struct *)ptr;
391 const unsigned char *num = (const unsigned char*)find->searchfor;
392 unsigned int n;
394 while (id) {
395 if (!isdigit((int)*num)) return true;
396 /* convert from decimal to int */
397 n = 0;
398 while (isdigit((int)*num)) n = (n * 10) + (*num++ - '0');
399 if (*num) {
400 if (*num != '.') return true;
401 num++;
403 if (n != (unsigned int)id->id) return true;
404 id = id->next;
406 if (*num == 0) find->foundpart = this;
407 return true;
410 Resource MimePart::findByName(const char *name) {
411 struct find_part_struct find;
412 find.searchfor = name;
413 find.foundpart = NULL;
414 enumerateParts(&MimePart::findPart, &find);
415 return Resource{find.foundpart};
418 static int filter_into_work_buffer(int c, void *dat) {
419 MimePart *part = (MimePart*)dat;
420 return part->filter(c);
423 int MimePart::filter(int c) {
424 char buf[] = {(char)c, '\0'};
425 m_parsedata.workbuf += buf;
426 if (m_parsedata.workbuf.size() >= 4096) {
427 (this->*m_extract_func)(m_parsedata.workbuf);
428 m_parsedata.workbuf.clear();
430 return c;
433 void MimePart::decoderPrepare(bool do_decode) {
434 enum mbfl_no_encoding from = mbfl_no_encoding_8bit;
436 if (do_decode && !m_content_transfer_encoding.empty()) {
437 from = mbfl_name2no_encoding(m_content_transfer_encoding.data());
438 if (from == mbfl_no_encoding_invalid) {
439 if (strcasecmp("binary", m_content_transfer_encoding.data()) != 0) {
440 raise_warning("mbstring doesn't know how to decode %s "
441 "transfer encoding!",
442 m_content_transfer_encoding.data());
444 from = mbfl_no_encoding_8bit;
448 m_parsedata.workbuf.clear();
450 if (do_decode) {
451 if (from == mbfl_no_encoding_8bit || from == mbfl_no_encoding_7bit) {
452 m_extract_filter = NULL;
453 } else {
454 m_extract_filter = mbfl_convert_filter_new(from, mbfl_no_encoding_8bit,
455 filter_into_work_buffer,
456 NULL, this);
461 void MimePart::decoderFinish() {
462 if (m_extract_filter) {
463 mbfl_convert_filter_flush(m_extract_filter);
464 mbfl_convert_filter_delete(m_extract_filter);
466 if (m_extract_func && !m_parsedata.workbuf.empty()) {
467 (this->*m_extract_func)(m_parsedata.workbuf);
468 m_parsedata.workbuf.clear();
472 void MimePart::decoderFeed(const String& str) {
473 if (!str.empty()) {
474 if (m_extract_filter) {
475 for (int i = 0; i < str.size(); i++) {
476 if (mbfl_convert_filter_feed(str[i], m_extract_filter) < 0) {
477 raise_warning("filter conversion failed. Input message is "
478 "probably incorrectly encoded");
479 return;
482 } else {
483 (this->*m_extract_func)(str);
488 const StaticString s_1_pt_0("1.0");
490 bool MimePart::isVersion1() {
491 return m_mime_version == s_1_pt_0 || m_parent;
494 const StaticString
495 s_headers("headers"),
496 s_starting_pos("starting-pos"),
497 s_starting_pos_body("starting-pos-body"),
498 s_ending_pos("ending-pos"),
499 s_ending_pos_body("ending-pos-body"),
500 s_line_count("line-count"),
501 s_body_line_count("body-line-count"),
502 s_charset("charset"),
503 s_transfer_encoding("transfer-encoding"),
504 s_content_type("content-type"),
505 s_content_("content-"),
506 s_text_plain_error("text/plain, (error)"),
507 s_content_disposition("content-disposition"),
508 s_disposition_("disposition-"),
509 s_content_location("content-location"),
510 s_content_base("content-base"),
511 s_content_boundary("content-boundary"),
512 s_content_id("content-id"),
513 s_content_description("content-description"),
514 s_content_language("content-language"),
515 s_content_md5("content-md5"),
516 s_boundary("boundary"),
517 s_to("to"),
518 s_cc("cc"),
519 s_mime_version("mime-version"),
520 s_content_transfer_encoding("content-transfer-encoding");
522 Variant MimePart::getPartData() {
523 Array ret = Array::Create();
525 ret.set(s_headers, m_headers);
527 ret.set(s_starting_pos, m_startpos);
528 ret.set(s_starting_pos_body, m_bodystart);
529 if (!m_parent) {
530 ret.set(s_ending_pos, m_endpos);
531 ret.set(s_ending_pos_body, m_bodyend);
532 ret.set(s_line_count, m_nlines);
533 ret.set(s_body_line_count, m_nbodylines);
534 } else {
535 ret.set(s_ending_pos, m_bodyend);
536 ret.set(s_ending_pos_body, m_bodyend);
537 ret.set(s_line_count, m_nlines ? m_nlines - 1 : m_nlines);
538 ret.set(s_body_line_count, m_nbodylines ? m_nbodylines - 1 : m_nbodylines);
541 if (!m_charset.empty()) {
542 ret.set(s_charset, m_charset);
543 } else {
544 ret.set(s_charset, "us-ascii");
546 if (!m_content_transfer_encoding.empty()) {
547 ret.set(s_transfer_encoding, m_content_transfer_encoding);
548 } else {
549 ret.set(s_transfer_encoding, "8bit");
551 if (!m_content_type.empty()) {
552 m_content_type.getAll(ret, s_content_type, s_content_);
553 } else {
554 ret.set(s_content_type, s_text_plain_error);
557 if (!m_content_disposition.empty()) {
558 m_content_disposition.getAll(ret, s_content_disposition, s_disposition_);
561 if (!m_content_location.empty()) {
562 ret.set(s_content_location, m_content_location);
564 if (!m_content_base.empty()) {
565 ret.set(s_content_base, m_content_base);
566 } else {
567 ret.set(s_content_base, "/");
570 if (!m_boundary.empty()) {
571 ret.set(s_content_boundary, m_boundary);
574 /* extract the address part of the content-id only */
575 Variant contentId = m_headers[s_content_id];
576 if (!contentId.isNull()) {
577 php_rfc822_tokenized_t *toks =
578 php_mailparse_rfc822_tokenize((const char*)contentId.toString().data(),
579 true);
580 php_rfc822_addresses_t *addrs =
581 php_rfc822_parse_address_tokens(toks);
582 if (addrs->naddrs > 0) {
583 ret.set(s_content_id, String(addrs->addrs[0].address, CopyString));
585 php_rfc822_free_addresses(addrs);
586 php_rfc822_tokenize_free(toks);
589 auto copyHeader = [&](const Variant& key) {
590 if (m_headers.exists(key)) ret.set(key, m_headers[key]);
592 copyHeader(s_content_description);
593 copyHeader(s_content_language);
594 copyHeader(s_content_md5);
595 return ret;
598 bool MimePart::parse(const char *buf, int bufsize) {
599 while (bufsize > 0) {
600 /* look for EOL */
601 int len = 0;
602 for (; len < bufsize; len++) {
603 if (buf[len] == '\n') break;
605 if (len < bufsize && buf[len] == '\n') {
606 ++len;
607 m_parsedata.workbuf += String(buf, len, CopyString);
608 ProcessLine(req::ptr<MimePart>(this), m_parsedata.workbuf);
609 m_parsedata.workbuf.clear();
610 } else {
611 m_parsedata.workbuf += String(buf, len, CopyString);
614 buf += len;
615 bufsize -= len;
617 return true;
620 req::ptr<MimePart> MimePart::createChild(int startpos, bool inherit) {
621 auto child = req::make<MimePart>();
622 m_parsedata.lastpart = child;
623 child->m_parent = this;
625 m_children.append(Resource(child));
626 child->m_startpos = child->m_endpos = child->m_bodystart =
627 child->m_bodyend = startpos;
629 if (inherit) {
630 child->m_content_transfer_encoding = m_content_transfer_encoding;
631 child->m_charset = m_charset;
633 return child;
636 bool MimePart::processHeader() {
637 if (m_parsedata.headerbuf.empty()) {
638 return true;
641 /* parse the header line */
642 php_rfc822_tokenized_t *toks =
643 php_mailparse_rfc822_tokenize(m_parsedata.headerbuf.data(), 0);
645 /* valid headers consist of at least three tokens,
646 with the first being a string and the second token being a ':' */
647 if (toks->ntokens < 2 || toks->tokens[0].token != 0 ||
648 toks->tokens[1].token != ':') {
649 m_parsedata.headerbuf.clear();
650 php_rfc822_tokenize_free(toks);
651 return false;
654 /* get a lower-case version of the first token */
655 String header_key(php_rfc822_recombine_tokens
656 (toks, 0, 1,
657 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS|
658 PHP_RFC822_RECOMBINE_STRTOLOWER), AttachString);
660 const char *header_val = strchr(m_parsedata.headerbuf.data(), ':');
661 String header_val_stripped(php_rfc822_recombine_tokens
662 (toks, 2, toks->ntokens-2,
663 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS|
664 PHP_RFC822_RECOMBINE_STRTOLOWER), AttachString);
666 if (header_val) {
667 header_val++;
668 while (isspace(*header_val)) {
669 header_val++;
672 /* add the header to the hash.
673 * join multiple To: or Cc: lines together */
674 if ((header_key == s_to || header_key == s_cc) &&
675 m_headers.exists(header_key)) {
676 String newstr = m_headers[header_key].toString();
677 newstr += ", ";
678 newstr += header_val;
679 m_headers.set(header_key, newstr);
680 } else {
681 if (m_headers.exists(header_key)) {
682 auto const zheaderval = m_headers.lvalAt(header_key).unboxed();
683 if (isArrayLikeType(zheaderval.type())) {
684 asArrRef(zheaderval).append(String(header_val, CopyString));
685 } else {
686 // Create a nested array if there is more than one of the same header
687 Array zarr = Array::Create();
688 zarr.append(zheaderval.tv());
689 zarr.append(String(header_val, CopyString));
690 m_headers.set(header_key, zarr);
692 } else {
693 m_headers.set(header_key, String(header_val, CopyString));
697 /* if it is useful, keep a pointer to it in the mime part */
698 if (header_key == s_mime_version) {
699 m_mime_version = header_val_stripped;
700 } else if (header_key == s_content_location) {
701 m_content_location =
702 String(php_rfc822_recombine_tokens
703 (toks, 2, toks->ntokens-2,
704 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
705 } else if (header_key == s_content_base) {
706 m_content_base =
707 String(php_rfc822_recombine_tokens
708 (toks, 2, toks->ntokens-2,
709 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
710 } else if (header_key == s_content_transfer_encoding) {
711 m_content_transfer_encoding = header_val_stripped;
712 } else if (header_key == s_content_type) {
713 m_content_type = MimeHeader(toks);
714 Variant boundary = m_content_type.get(s_boundary);
715 if (!boundary.isNull()) {
716 m_boundary = boundary.toString();
718 Variant charset = m_content_type.get(s_charset);
719 if (!charset.isNull()) {
720 m_charset = charset.toString();
722 } else if (header_key == s_content_disposition) {
723 m_content_disposition = MimeHeader(toks);
727 php_rfc822_tokenize_free(toks);
728 m_parsedata.headerbuf.clear();
729 return true;
732 bool MimePart::ProcessLine(req::ptr<MimePart> workpart, const String& line) {
733 /* sanity check */
734 if (workpart->m_children.size() > MAXPARTS) {
735 raise_warning("MIME message too complex");
736 return false;
739 const char *c = line.data();
741 /* strip trailing \r\n -- we always have a trailing \n */
742 int origcount = line.size();
743 int linelen = origcount - 1;
744 if (linelen && c[linelen-1] == '\r') {
745 --linelen;
748 /* Discover which part we were last working on */
749 while (workpart->m_parsedata.lastpart) {
750 auto lastpart = workpart->m_parsedata.lastpart;
752 if (lastpart->m_parsedata.completed) {
753 UpdatePositions(workpart, workpart->m_endpos + origcount,
754 workpart->m_endpos + origcount, 1);
755 return true;
757 if (workpart->m_boundary.empty() || workpart->m_parsedata.in_header) {
758 workpart = lastpart;
759 continue;
761 int bound_len = workpart->m_boundary.size();
763 /* Look for a boundary */
764 if (c[0] == '-' && c[1] == '-' && linelen >= 2+bound_len &&
765 strncasecmp(workpart->m_boundary.data(), c+2, bound_len) == 0) {
767 /* is it the final boundary ? */
768 if (linelen >= 4 + bound_len && strncmp(c+2+bound_len, "--", 2) == 0) {
769 lastpart->m_parsedata.completed = true;
770 UpdatePositions(workpart, workpart->m_endpos + origcount,
771 workpart->m_endpos + origcount, 1);
772 return true;
775 auto newpart =
776 workpart->createChild(workpart->m_endpos + origcount, true);
777 UpdatePositions(workpart, workpart->m_endpos + origcount,
778 workpart->m_endpos + linelen, 1);
779 newpart->m_mime_version = workpart->m_mime_version;
780 newpart->m_parsedata.in_header = true;
781 return true;
783 workpart = lastpart;
786 if (!workpart->m_parsedata.in_header) {
787 if (!workpart->m_parsedata.completed && !workpart->m_parsedata.lastpart) {
788 /* update the body/part end positions.
789 * For multipart messages, the final newline belongs to the boundary.
790 * Otherwise it belongs to the body
791 * */
792 if (workpart->m_parent &&
793 strncasecmp(workpart->getParent()->m_content_type.m_value.data(),
794 "multipart/", 10) == 0) {
795 UpdatePositions(workpart, workpart->m_endpos + origcount,
796 workpart->m_endpos + linelen, true);
797 } else {
798 UpdatePositions(workpart, workpart->m_endpos + origcount,
799 workpart->m_endpos + origcount, true);
802 } else {
804 if (linelen > 0) {
805 UpdatePositions(workpart, workpart->m_endpos + origcount,
806 workpart->m_endpos + linelen, true);
808 if (*c == ' ' || *c == '\t') {
809 /* This doesn't technically confirm to rfc2822, as we're replacing
810 \t with \s, but this seems to fix cases where clients incorrectly
811 fold by inserting a \t character.
813 workpart->m_parsedata.headerbuf += " ";
814 c++; linelen--;
815 } else {
816 workpart->processHeader();
818 /* save header for possible continuation */
819 workpart->m_parsedata.headerbuf += String(c, linelen, CopyString);
821 } else {
822 /* end of headers */
823 workpart->processHeader();
825 /* start of body */
826 workpart->m_parsedata.in_header = false;
827 workpart->m_bodystart = workpart->m_endpos + origcount;
828 UpdatePositions(workpart, workpart->m_bodystart, workpart->m_bodystart,
829 true);
830 --workpart->m_nbodylines;
832 /* some broken mailers include the content-type header but not a
833 * mime-version header.
834 * Let's relax and pretend they said they were mime 1.0 compatible */
835 if (workpart->m_mime_version.empty() &&
836 !workpart->m_content_type.empty()) {
837 workpart->m_mime_version = "1.0";
840 if (!workpart->isVersion1()) {
841 /* if we don't understand the MIME version, discard the content-type
842 and boundary */
843 workpart->m_content_disposition.clear();
844 workpart->m_boundary.clear();
845 workpart->m_content_type.clear();
846 workpart->m_content_type = MimeHeader("text/plain");
848 /* if there is no content type, default to text/plain, but use
849 multipart/digest when in a multipart/rfc822 message */
850 if (workpart->isVersion1() && workpart->m_content_type.empty()) {
851 char *def_type = "text/plain";
852 if (workpart->m_parent &&
853 strcasecmp(workpart->getParent()->m_content_type.m_value.data(),
854 "multipart/digest") == 0) {
855 def_type = "message/rfc822";
857 workpart->m_content_type = MimeHeader(def_type);
860 /* if no charset had previously been set, either through inheritance
861 * or by an explicit content-type header, default to us-ascii */
862 if (workpart->m_charset.isNull()) {
863 workpart->m_charset = "us-ascii";
866 if (strcasecmp(workpart->m_content_type.m_value.data(),
867 "message/rfc822") == 0) {
868 workpart = workpart->createChild(workpart->m_bodystart, false);
869 workpart->m_parsedata.in_header = true;
870 return true;
874 /* create a section for the preamble that precedes the first boundary */
875 if (!workpart->m_boundary.empty()) {
876 workpart = workpart->createChild(workpart->m_bodystart, true);
877 workpart->m_parsedata.in_header = false;
878 workpart->m_parsedata.is_dummy = true;
879 return true;
882 return true;
887 return true;
890 void MimePart::UpdatePositions(req::ptr<MimePart> part, int newendpos,
891 int newbodyend, int deltanlines) {
892 while (part) {
893 part->m_endpos = newendpos;
894 part->m_bodyend = newbodyend;
895 part->m_nlines += deltanlines;
896 if (!part->m_parsedata.in_header) {
897 part->m_nbodylines += deltanlines;
899 part = part->m_parent;
903 Variant MimePart::extract(const Variant& filename, const Variant& callbackfunc, int decode,
904 bool isfile) {
905 /* filename can be a filename or a stream */
906 req::ptr<File> file;
907 if (filename.isResource()) {
908 file = cast<File>(filename);
909 } else if (isfile) {
910 file = File::Open(filename.toString(), "rb");
911 } else {
912 /* filename is the actual data */
913 String data = filename.toString();
914 file = req::make<MemFile>(data.data(), data.size());
917 if (!file) {
918 return false;
921 m_extract_context = callbackfunc;
922 if (callbackfunc.isString() && callbackfunc.toString().empty()) {
923 m_extract_func = &MimePart::outputToStdout;
924 } else {
925 if (callbackfunc.isNull()) {
926 m_extract_func = &MimePart::outputToString;
927 } else if (callbackfunc.isResource()) {
928 m_extract_func = &MimePart::outputToFile;
929 } else {
930 m_extract_func = &MimePart::callUserFunc;
934 if (extractImpl(decode, file)) {
935 if (callbackfunc.isNull()) {
936 return m_extract_context;
938 if (callbackfunc.isResource()) {
939 return HHVM_FN(stream_get_contents)(callbackfunc.toResource());
941 return true;
943 return init_null();
946 int MimePart::extractImpl(int decode, req::ptr<File> src) {
947 /* figure out where the message part starts/ends */
948 int start_pos = (decode & DecodeNoHeaders) ? m_bodystart : m_startpos;
949 int end = (decode & DecodeNoBody) ?
950 m_bodystart : (m_parent ? m_bodyend : m_endpos);
952 decoderPrepare(decode & Decode8Bit);
954 if (!src->seek(start_pos)) {
955 raise_warning("unable to seek to section start");
956 decoderFinish();
957 return false;
960 while (start_pos < end) {
961 int n = 4095;
962 if (n > end - start_pos) n = end - start_pos;
963 String str = src->read(n);
964 if (str.empty()) {
965 raise_warning("error reading from file at offset %d", start_pos);
966 decoderFinish();
967 return false;
969 decoderFeed(str);
970 start_pos += str.size();
972 decoderFinish();
973 return true;
976 void MimePart::callUserFunc(const String& s) {
977 vm_call_user_func(m_extract_context, make_packed_array(s));
980 void MimePart::outputToStdout(const String& s) {
981 g_context->write(s);
984 void MimePart::outputToFile(const String& s) {
985 cast<File>(m_extract_context)->write(s);
988 void MimePart::outputToString(const String& s) {
989 m_extract_context = m_extract_context.toString() + s;
992 ///////////////////////////////////////////////////////////////////////////////