2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/mailparse/mime.h"
19 #include "hphp/runtime/ext/stream/ext_stream.h"
20 #include "hphp/runtime/base/array-init.h"
21 #include "hphp/runtime/base/builtin-functions.h"
22 #include "hphp/runtime/base/mem-file.h"
23 #include "hphp/runtime/base/runtime-error.h"
29 ///////////////////////////////////////////////////////////////////////////////
31 MimePart::MimeHeader::MimeHeader()
35 MimePart::MimeHeader::MimeHeader(const char *value
)
38 m_attributes
= Array::Create();
39 m_value
= String(value
, CopyString
);
42 MimePart::MimeHeader::MimeHeader(php_rfc822_tokenized_t
*toks
)
44 int i
, first_semi
, next_semi
, comments_before_semi
, netscape_bug
= 0;
46 StringBuffer value_buf
;
47 bool is_rfc2231_name
= false;
49 int charset_p
= 0, prevcharset_p
= 0;
50 bool namechanged
= false, currentencoded
= false;
52 m_attributes
= Array::Create();
54 /* php_rfc822_print_tokens(toks); */
55 /* look for optional ; which separates optional attributes from the main
57 for (first_semi
= 2; first_semi
< toks
->ntokens
; first_semi
++)
58 if (toks
->tokens
[first_semi
].token
== ';') break;
60 m_value
= String(php_rfc822_recombine_tokens
61 (toks
, 2, first_semi
- 2,
62 PHP_RFC822_RECOMBINE_STRTOLOWER
|
63 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
), AttachString
);
65 if (first_semi
< toks
->ntokens
) first_semi
++;
67 /* Netscape Bug: Messenger sometimes omits the semi when wrapping the
69 * That means we have to be even more clever than the spec says that
72 while (first_semi
< toks
->ntokens
) {
74 comments_before_semi
= 0;
75 for (next_semi
= first_semi
; next_semi
< toks
->ntokens
; next_semi
++) {
76 if (toks
->tokens
[next_semi
].token
== ';') break;
77 if (toks
->tokens
[next_semi
].token
== '(') comments_before_semi
++;
85 while (i
< next_semi
&& toks
->tokens
[i
].token
== '(') {
89 if (i
< next_semi
&& toks
->tokens
[i
].token
== '=') {
90 /* Here, next_semi --> "name" and i --> "=", so skip "=" sign */
93 /* count those tokens; we expect "token = token" (3 tokens); if there
94 * are more than that, then something is quite possibly wrong
96 if (next_semi
< toks
->ntokens
&& toks
->tokens
[next_semi
].token
!= ';'
97 && next_semi
- first_semi
- comments_before_semi
> 3) {
102 String
name(php_rfc822_recombine_tokens
103 (toks
, first_semi
, 1,
104 PHP_RFC822_RECOMBINE_STRTOLOWER
|
105 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
), AttachString
);
106 String
value(php_rfc822_recombine_tokens
107 (toks
, i
, next_semi
- i
,
108 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
), AttachString
);
110 /* support rfc2231 mime parameter value
112 * Parameter Value Continuations:
114 * Content-Type: message/external-body; access-type=URL;
116 * URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
118 * is semantically identical to
120 * Content-Type: message/external-body; access-type=URL;
121 * URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
123 * Original rfc2231 support by IceWarp Ltd. <info@icewarp.com>
125 check_name
= const_cast<char*>(strchr(name
.data(), '*'));
127 currentencoded
= true;
129 /* Is last char * - charset encoding */
130 charset_p
= (name
[name
.size() -1] == '*');
132 /* Leave only attribute name without * */
135 /* New item or continuous */
136 if (name_buf
.isNull()) {
140 namechanged
= (name_buf
!= name
);
146 /* Check if name changed*/
149 /* Append string to buffer - check if to be encoded... */
150 rfc2231_to_mime(value_buf
, (char*)value
.data(), charset_p
,
154 prevcharset_p
= charset_p
;
157 is_rfc2231_name
= true;
160 /* Last item was encoded */
161 if (is_rfc2231_name
) {
162 /* Name not null and name differs with new name*/
163 if (!name
.empty() && name_buf
!= name
) {
164 /* Finalize packet */
165 rfc2231_to_mime(value_buf
, NULL
, 0, prevcharset_p
);
167 m_attributes
.set(name_buf
, value_buf
.detach());
171 is_rfc2231_name
= false;
174 /* New non encoded name*/
175 if (!currentencoded
) {
177 m_attributes
.set(name
, value
);
178 } else { /* Encoded name changed*/
180 /* Append string to buffer - check if to be encoded... */
181 rfc2231_to_mime(value_buf
, (char*)value
.data(), charset_p
,
185 is_rfc2231_name
= true;
187 prevcharset_p
= charset_p
;
194 m_attributes
.set(name
, value
);
199 if (next_semi
< toks
->ntokens
&& !netscape_bug
) {
203 first_semi
= next_semi
;
207 if (is_rfc2231_name
) {
208 /* Finalize packet */
209 rfc2231_to_mime(value_buf
, NULL
, 0, prevcharset_p
);
210 m_attributes
.set(name_buf
, value_buf
.detach());
214 void MimePart::MimeHeader::clear() {
217 m_attributes
.reset();
220 Variant
MimePart::MimeHeader::get(const String
& attrname
) {
221 return m_attributes
[attrname
];
224 void MimePart::MimeHeader::getAll(Array
&ret
, const String
& valuelabel
,
225 const String
& attrprefix
) {
226 for (ArrayIter
iter(m_attributes
); iter
; ++iter
) {
227 ret
.set(attrprefix
+ iter
.first().toString(), iter
.second());
230 /* do this last so that a bogus set of headers like this:
231 * Content-Type: multipart/related;
232 * boundary="----=_NextPart_00_0017_01C091F4.1B5EF6B0";
235 * doesn't overwrite content-type with the type="text/html"
238 ret
.set(valuelabel
, m_value
);
241 void MimePart::MimeHeader::rfc2231_to_mime(StringBuffer
&value_buf
,
243 int charset_p
, int prevcharset_p
) {
244 char *strp
, *startofvalue
= NULL
;
247 /* Process string, get positions and replace */
248 /* Set to start of buffer*/
251 /* Previous charset already set so only convert %nn to =nn*/
252 if (prevcharset_p
) quotes
=2;
265 startofvalue
= strp
+1;
271 /* Replace % with = - quoted printable*/
272 if (*strp
== '%' && quotes
==2) {
280 /* If first encoded token*/
281 if (charset_p
&& !prevcharset_p
&& startofvalue
) {
282 value_buf
.append("=?");
283 value_buf
.append(value
);
284 value_buf
.append("?Q?");
285 value_buf
.append(startofvalue
);
288 /* If last encoded token*/
289 if (prevcharset_p
&& !charset_p
) {
290 value_buf
.append("?=");
294 if ((!charset_p
|| (prevcharset_p
&& charset_p
)) && value
) {
295 value_buf
.append(value
);
299 ///////////////////////////////////////////////////////////////////////////////
302 : m_startpos(0), m_endpos(0), m_bodystart(0), m_bodyend(0),
303 m_nlines(0), m_nbodylines(0) {
304 m_headers
= Array::Create();
306 /* begin in header parsing mode */
307 m_parsedata
.in_header
= true;
308 m_parsedata
.is_dummy
= false;
309 m_parsedata
.completed
= false;
312 ///////////////////////////////////////////////////////////////////////////////
315 bool MimePart::enumeratePartsImpl(Enumerator
*top
, Enumerator
**child
,
316 PFUNC_ENUMERATOR callback
, void *ptr
) {
318 if (!(this->*callback
)(top
, ptr
)) return false;
323 if (!strncasecmp(m_content_type
.m_value
.data(), "multipart/", 10)) {
327 for (ArrayIter
iter(m_children
); iter
; ++iter
) {
329 auto childpart
= cast
<MimePart
>(iter
.second());
330 if (!childpart
->enumeratePartsImpl(top
, &next
.next
, callback
, ptr
)) {
339 void MimePart::enumerateParts(PFUNC_ENUMERATOR callback
, void *ptr
) {
342 enumeratePartsImpl(&top
, &top
.next
, callback
, ptr
);
345 struct find_part_struct
{
346 const char *searchfor
;
350 bool MimePart::getStructure(Enumerator
*id
, void *ptr
) {
354 char *buf
= (char*)malloc(buf_size
);
356 while (id
&& i
< buf_size
) {
357 sprintf(intbuf
, "%d", id
->id
);
358 len
= strlen(intbuf
);
359 if (len
> (buf_size
-i
)) {
360 raise_warning("too many nested sections in message");
364 if ((i
+ len
+ 1) >= buf_size
) {
365 buf_size
= buf_size
<< 1;
366 buf
= (char*)realloc(buf
, buf_size
);
369 folly::sformat("The structure buffer has been exceeded "
370 "({}). Please try decreasing the nesting "
371 "depth of messages and report this to the "
372 "developers.", buf_size
).c_str());
375 sprintf(&buf
[i
], "%s%c", intbuf
, id
->next
? '.' : '\0');
376 i
+= len
+ (id
->next
? 1 : 0);
379 ((Array
*)ptr
)->append(String(buf
, AttachString
));
383 Array
MimePart::getStructure() {
384 Array ret
= Array::Create();
385 enumerateParts(&MimePart::getStructure
, &ret
);
389 bool MimePart::findPart(Enumerator
*id
, void *ptr
) {
390 struct find_part_struct
*find
= (find_part_struct
*)ptr
;
391 const unsigned char *num
= (const unsigned char*)find
->searchfor
;
395 if (!isdigit((int)*num
)) return true;
396 /* convert from decimal to int */
398 while (isdigit((int)*num
)) n
= (n
* 10) + (*num
++ - '0');
400 if (*num
!= '.') return true;
403 if (n
!= (unsigned int)id
->id
) return true;
406 if (*num
== 0) find
->foundpart
= this;
410 Resource
MimePart::findByName(const char *name
) {
411 struct find_part_struct find
;
412 find
.searchfor
= name
;
413 find
.foundpart
= NULL
;
414 enumerateParts(&MimePart::findPart
, &find
);
415 return Resource
{find
.foundpart
};
418 static int filter_into_work_buffer(int c
, void *dat
) {
419 MimePart
*part
= (MimePart
*)dat
;
420 return part
->filter(c
);
423 int MimePart::filter(int c
) {
424 char buf
[] = {(char)c
, '\0'};
425 m_parsedata
.workbuf
+= buf
;
426 if (m_parsedata
.workbuf
.size() >= 4096) {
427 (this->*m_extract_func
)(m_parsedata
.workbuf
);
428 m_parsedata
.workbuf
.clear();
433 void MimePart::decoderPrepare(bool do_decode
) {
434 enum mbfl_no_encoding from
= mbfl_no_encoding_8bit
;
436 if (do_decode
&& !m_content_transfer_encoding
.empty()) {
437 from
= mbfl_name2no_encoding(m_content_transfer_encoding
.data());
438 if (from
== mbfl_no_encoding_invalid
) {
439 if (strcasecmp("binary", m_content_transfer_encoding
.data()) != 0) {
440 raise_warning("mbstring doesn't know how to decode %s "
441 "transfer encoding!",
442 m_content_transfer_encoding
.data());
444 from
= mbfl_no_encoding_8bit
;
448 m_parsedata
.workbuf
.clear();
451 if (from
== mbfl_no_encoding_8bit
|| from
== mbfl_no_encoding_7bit
) {
452 m_extract_filter
= NULL
;
454 m_extract_filter
= mbfl_convert_filter_new(from
, mbfl_no_encoding_8bit
,
455 filter_into_work_buffer
,
461 void MimePart::decoderFinish() {
462 if (m_extract_filter
) {
463 mbfl_convert_filter_flush(m_extract_filter
);
464 mbfl_convert_filter_delete(m_extract_filter
);
466 if (m_extract_func
&& !m_parsedata
.workbuf
.empty()) {
467 (this->*m_extract_func
)(m_parsedata
.workbuf
);
468 m_parsedata
.workbuf
.clear();
472 void MimePart::decoderFeed(const String
& str
) {
474 if (m_extract_filter
) {
475 for (int i
= 0; i
< str
.size(); i
++) {
476 if (mbfl_convert_filter_feed(str
[i
], m_extract_filter
) < 0) {
477 raise_warning("filter conversion failed. Input message is "
478 "probably incorrectly encoded");
483 (this->*m_extract_func
)(str
);
488 const StaticString
s_1_pt_0("1.0");
490 bool MimePart::isVersion1() {
491 return m_mime_version
== s_1_pt_0
|| m_parent
;
495 s_headers("headers"),
496 s_starting_pos("starting-pos"),
497 s_starting_pos_body("starting-pos-body"),
498 s_ending_pos("ending-pos"),
499 s_ending_pos_body("ending-pos-body"),
500 s_line_count("line-count"),
501 s_body_line_count("body-line-count"),
502 s_charset("charset"),
503 s_transfer_encoding("transfer-encoding"),
504 s_content_type("content-type"),
505 s_content_("content-"),
506 s_text_plain_error("text/plain, (error)"),
507 s_content_disposition("content-disposition"),
508 s_disposition_("disposition-"),
509 s_content_location("content-location"),
510 s_content_base("content-base"),
511 s_content_boundary("content-boundary"),
512 s_content_id("content-id"),
513 s_content_description("content-description"),
514 s_content_language("content-language"),
515 s_content_md5("content-md5"),
516 s_boundary("boundary"),
519 s_mime_version("mime-version"),
520 s_content_transfer_encoding("content-transfer-encoding");
522 Variant
MimePart::getPartData() {
523 Array ret
= Array::Create();
525 ret
.set(s_headers
, m_headers
);
527 ret
.set(s_starting_pos
, m_startpos
);
528 ret
.set(s_starting_pos_body
, m_bodystart
);
530 ret
.set(s_ending_pos
, m_endpos
);
531 ret
.set(s_ending_pos_body
, m_bodyend
);
532 ret
.set(s_line_count
, m_nlines
);
533 ret
.set(s_body_line_count
, m_nbodylines
);
535 ret
.set(s_ending_pos
, m_bodyend
);
536 ret
.set(s_ending_pos_body
, m_bodyend
);
537 ret
.set(s_line_count
, m_nlines
? m_nlines
- 1 : m_nlines
);
538 ret
.set(s_body_line_count
, m_nbodylines
? m_nbodylines
- 1 : m_nbodylines
);
541 if (!m_charset
.empty()) {
542 ret
.set(s_charset
, m_charset
);
544 ret
.set(s_charset
, "us-ascii");
546 if (!m_content_transfer_encoding
.empty()) {
547 ret
.set(s_transfer_encoding
, m_content_transfer_encoding
);
549 ret
.set(s_transfer_encoding
, "8bit");
551 if (!m_content_type
.empty()) {
552 m_content_type
.getAll(ret
, s_content_type
, s_content_
);
554 ret
.set(s_content_type
, s_text_plain_error
);
557 if (!m_content_disposition
.empty()) {
558 m_content_disposition
.getAll(ret
, s_content_disposition
, s_disposition_
);
561 if (!m_content_location
.empty()) {
562 ret
.set(s_content_location
, m_content_location
);
564 if (!m_content_base
.empty()) {
565 ret
.set(s_content_base
, m_content_base
);
567 ret
.set(s_content_base
, "/");
570 if (!m_boundary
.empty()) {
571 ret
.set(s_content_boundary
, m_boundary
);
574 /* extract the address part of the content-id only */
575 Variant contentId
= m_headers
[s_content_id
];
576 if (!contentId
.isNull()) {
577 php_rfc822_tokenized_t
*toks
=
578 php_mailparse_rfc822_tokenize((const char*)contentId
.toString().data(),
580 php_rfc822_addresses_t
*addrs
=
581 php_rfc822_parse_address_tokens(toks
);
582 if (addrs
->naddrs
> 0) {
583 ret
.set(s_content_id
, String(addrs
->addrs
[0].address
, CopyString
));
585 php_rfc822_free_addresses(addrs
);
586 php_rfc822_tokenize_free(toks
);
589 auto copyHeader
= [&](const Variant
& key
) {
590 if (m_headers
.exists(key
)) ret
.set(key
, m_headers
[key
]);
592 copyHeader(s_content_description
);
593 copyHeader(s_content_language
);
594 copyHeader(s_content_md5
);
598 bool MimePart::parse(const char *buf
, int bufsize
) {
599 while (bufsize
> 0) {
602 for (; len
< bufsize
; len
++) {
603 if (buf
[len
] == '\n') break;
605 if (len
< bufsize
&& buf
[len
] == '\n') {
607 m_parsedata
.workbuf
+= String(buf
, len
, CopyString
);
608 ProcessLine(req::ptr
<MimePart
>(this), m_parsedata
.workbuf
);
609 m_parsedata
.workbuf
.clear();
611 m_parsedata
.workbuf
+= String(buf
, len
, CopyString
);
620 req::ptr
<MimePart
> MimePart::createChild(int startpos
, bool inherit
) {
621 auto child
= req::make
<MimePart
>();
622 m_parsedata
.lastpart
= child
;
623 child
->m_parent
= this;
625 m_children
.append(Resource(child
));
626 child
->m_startpos
= child
->m_endpos
= child
->m_bodystart
=
627 child
->m_bodyend
= startpos
;
630 child
->m_content_transfer_encoding
= m_content_transfer_encoding
;
631 child
->m_charset
= m_charset
;
636 bool MimePart::processHeader() {
637 if (m_parsedata
.headerbuf
.empty()) {
641 /* parse the header line */
642 php_rfc822_tokenized_t
*toks
=
643 php_mailparse_rfc822_tokenize(m_parsedata
.headerbuf
.data(), 0);
645 /* valid headers consist of at least three tokens,
646 with the first being a string and the second token being a ':' */
647 if (toks
->ntokens
< 2 || toks
->tokens
[0].token
!= 0 ||
648 toks
->tokens
[1].token
!= ':') {
649 m_parsedata
.headerbuf
.clear();
650 php_rfc822_tokenize_free(toks
);
654 /* get a lower-case version of the first token */
655 String
header_key(php_rfc822_recombine_tokens
657 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
|
658 PHP_RFC822_RECOMBINE_STRTOLOWER
), AttachString
);
660 const char *header_val
= strchr(m_parsedata
.headerbuf
.data(), ':');
661 String
header_val_stripped(php_rfc822_recombine_tokens
662 (toks
, 2, toks
->ntokens
-2,
663 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
|
664 PHP_RFC822_RECOMBINE_STRTOLOWER
), AttachString
);
668 while (isspace(*header_val
)) {
672 /* add the header to the hash.
673 * join multiple To: or Cc: lines together */
674 if ((header_key
== s_to
|| header_key
== s_cc
) &&
675 m_headers
.exists(header_key
)) {
676 String newstr
= m_headers
[header_key
].toString();
678 newstr
+= header_val
;
679 m_headers
.set(header_key
, newstr
);
681 if (m_headers
.exists(header_key
)) {
682 auto const zheaderval
= m_headers
.lvalAt(header_key
).unboxed();
683 if (isArrayLikeType(zheaderval
.type())) {
684 asArrRef(zheaderval
).append(String(header_val
, CopyString
));
686 // Create a nested array if there is more than one of the same header
687 Array zarr
= Array::Create();
688 zarr
.append(zheaderval
.tv());
689 zarr
.append(String(header_val
, CopyString
));
690 m_headers
.set(header_key
, zarr
);
693 m_headers
.set(header_key
, String(header_val
, CopyString
));
697 /* if it is useful, keep a pointer to it in the mime part */
698 if (header_key
== s_mime_version
) {
699 m_mime_version
= header_val_stripped
;
700 } else if (header_key
== s_content_location
) {
702 String(php_rfc822_recombine_tokens
703 (toks
, 2, toks
->ntokens
-2,
704 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
), AttachString
);
705 } else if (header_key
== s_content_base
) {
707 String(php_rfc822_recombine_tokens
708 (toks
, 2, toks
->ntokens
-2,
709 PHP_RFC822_RECOMBINE_IGNORE_COMMENTS
), AttachString
);
710 } else if (header_key
== s_content_transfer_encoding
) {
711 m_content_transfer_encoding
= header_val_stripped
;
712 } else if (header_key
== s_content_type
) {
713 m_content_type
= MimeHeader(toks
);
714 Variant boundary
= m_content_type
.get(s_boundary
);
715 if (!boundary
.isNull()) {
716 m_boundary
= boundary
.toString();
718 Variant charset
= m_content_type
.get(s_charset
);
719 if (!charset
.isNull()) {
720 m_charset
= charset
.toString();
722 } else if (header_key
== s_content_disposition
) {
723 m_content_disposition
= MimeHeader(toks
);
727 php_rfc822_tokenize_free(toks
);
728 m_parsedata
.headerbuf
.clear();
732 bool MimePart::ProcessLine(req::ptr
<MimePart
> workpart
, const String
& line
) {
734 if (workpart
->m_children
.size() > MAXPARTS
) {
735 raise_warning("MIME message too complex");
739 const char *c
= line
.data();
741 /* strip trailing \r\n -- we always have a trailing \n */
742 int origcount
= line
.size();
743 int linelen
= origcount
- 1;
744 if (linelen
&& c
[linelen
-1] == '\r') {
748 /* Discover which part we were last working on */
749 while (workpart
->m_parsedata
.lastpart
) {
750 auto lastpart
= workpart
->m_parsedata
.lastpart
;
752 if (lastpart
->m_parsedata
.completed
) {
753 UpdatePositions(workpart
, workpart
->m_endpos
+ origcount
,
754 workpart
->m_endpos
+ origcount
, 1);
757 if (workpart
->m_boundary
.empty() || workpart
->m_parsedata
.in_header
) {
761 int bound_len
= workpart
->m_boundary
.size();
763 /* Look for a boundary */
764 if (c
[0] == '-' && c
[1] == '-' && linelen
>= 2+bound_len
&&
765 strncasecmp(workpart
->m_boundary
.data(), c
+2, bound_len
) == 0) {
767 /* is it the final boundary ? */
768 if (linelen
>= 4 + bound_len
&& strncmp(c
+2+bound_len
, "--", 2) == 0) {
769 lastpart
->m_parsedata
.completed
= true;
770 UpdatePositions(workpart
, workpart
->m_endpos
+ origcount
,
771 workpart
->m_endpos
+ origcount
, 1);
776 workpart
->createChild(workpart
->m_endpos
+ origcount
, true);
777 UpdatePositions(workpart
, workpart
->m_endpos
+ origcount
,
778 workpart
->m_endpos
+ linelen
, 1);
779 newpart
->m_mime_version
= workpart
->m_mime_version
;
780 newpart
->m_parsedata
.in_header
= true;
786 if (!workpart
->m_parsedata
.in_header
) {
787 if (!workpart
->m_parsedata
.completed
&& !workpart
->m_parsedata
.lastpart
) {
788 /* update the body/part end positions.
789 * For multipart messages, the final newline belongs to the boundary.
790 * Otherwise it belongs to the body
792 if (workpart
->m_parent
&&
793 strncasecmp(workpart
->getParent()->m_content_type
.m_value
.data(),
794 "multipart/", 10) == 0) {
795 UpdatePositions(workpart
, workpart
->m_endpos
+ origcount
,
796 workpart
->m_endpos
+ linelen
, true);
798 UpdatePositions(workpart
, workpart
->m_endpos
+ origcount
,
799 workpart
->m_endpos
+ origcount
, true);
805 UpdatePositions(workpart
, workpart
->m_endpos
+ origcount
,
806 workpart
->m_endpos
+ linelen
, true);
808 if (*c
== ' ' || *c
== '\t') {
809 /* This doesn't technically confirm to rfc2822, as we're replacing
810 \t with \s, but this seems to fix cases where clients incorrectly
811 fold by inserting a \t character.
813 workpart
->m_parsedata
.headerbuf
+= " ";
816 workpart
->processHeader();
818 /* save header for possible continuation */
819 workpart
->m_parsedata
.headerbuf
+= String(c
, linelen
, CopyString
);
823 workpart
->processHeader();
826 workpart
->m_parsedata
.in_header
= false;
827 workpart
->m_bodystart
= workpart
->m_endpos
+ origcount
;
828 UpdatePositions(workpart
, workpart
->m_bodystart
, workpart
->m_bodystart
,
830 --workpart
->m_nbodylines
;
832 /* some broken mailers include the content-type header but not a
833 * mime-version header.
834 * Let's relax and pretend they said they were mime 1.0 compatible */
835 if (workpart
->m_mime_version
.empty() &&
836 !workpart
->m_content_type
.empty()) {
837 workpart
->m_mime_version
= "1.0";
840 if (!workpart
->isVersion1()) {
841 /* if we don't understand the MIME version, discard the content-type
843 workpart
->m_content_disposition
.clear();
844 workpart
->m_boundary
.clear();
845 workpart
->m_content_type
.clear();
846 workpart
->m_content_type
= MimeHeader("text/plain");
848 /* if there is no content type, default to text/plain, but use
849 multipart/digest when in a multipart/rfc822 message */
850 if (workpart
->isVersion1() && workpart
->m_content_type
.empty()) {
851 char *def_type
= "text/plain";
852 if (workpart
->m_parent
&&
853 strcasecmp(workpart
->getParent()->m_content_type
.m_value
.data(),
854 "multipart/digest") == 0) {
855 def_type
= "message/rfc822";
857 workpart
->m_content_type
= MimeHeader(def_type
);
860 /* if no charset had previously been set, either through inheritance
861 * or by an explicit content-type header, default to us-ascii */
862 if (workpart
->m_charset
.isNull()) {
863 workpart
->m_charset
= "us-ascii";
866 if (strcasecmp(workpart
->m_content_type
.m_value
.data(),
867 "message/rfc822") == 0) {
868 workpart
= workpart
->createChild(workpart
->m_bodystart
, false);
869 workpart
->m_parsedata
.in_header
= true;
874 /* create a section for the preamble that precedes the first boundary */
875 if (!workpart
->m_boundary
.empty()) {
876 workpart
= workpart
->createChild(workpart
->m_bodystart
, true);
877 workpart
->m_parsedata
.in_header
= false;
878 workpart
->m_parsedata
.is_dummy
= true;
890 void MimePart::UpdatePositions(req::ptr
<MimePart
> part
, int newendpos
,
891 int newbodyend
, int deltanlines
) {
893 part
->m_endpos
= newendpos
;
894 part
->m_bodyend
= newbodyend
;
895 part
->m_nlines
+= deltanlines
;
896 if (!part
->m_parsedata
.in_header
) {
897 part
->m_nbodylines
+= deltanlines
;
899 part
= part
->m_parent
;
903 Variant
MimePart::extract(const Variant
& filename
, const Variant
& callbackfunc
, int decode
,
905 /* filename can be a filename or a stream */
907 if (filename
.isResource()) {
908 file
= cast
<File
>(filename
);
910 file
= File::Open(filename
.toString(), "rb");
912 /* filename is the actual data */
913 String data
= filename
.toString();
914 file
= req::make
<MemFile
>(data
.data(), data
.size());
921 m_extract_context
= callbackfunc
;
922 if (callbackfunc
.isString() && callbackfunc
.toString().empty()) {
923 m_extract_func
= &MimePart::outputToStdout
;
925 if (callbackfunc
.isNull()) {
926 m_extract_func
= &MimePart::outputToString
;
927 } else if (callbackfunc
.isResource()) {
928 m_extract_func
= &MimePart::outputToFile
;
930 m_extract_func
= &MimePart::callUserFunc
;
934 if (extractImpl(decode
, file
)) {
935 if (callbackfunc
.isNull()) {
936 return m_extract_context
;
938 if (callbackfunc
.isResource()) {
939 return HHVM_FN(stream_get_contents
)(callbackfunc
.toResource());
946 int MimePart::extractImpl(int decode
, req::ptr
<File
> src
) {
947 /* figure out where the message part starts/ends */
948 int start_pos
= (decode
& DecodeNoHeaders
) ? m_bodystart
: m_startpos
;
949 int end
= (decode
& DecodeNoBody
) ?
950 m_bodystart
: (m_parent
? m_bodyend
: m_endpos
);
952 decoderPrepare(decode
& Decode8Bit
);
954 if (!src
->seek(start_pos
)) {
955 raise_warning("unable to seek to section start");
960 while (start_pos
< end
) {
962 if (n
> end
- start_pos
) n
= end
- start_pos
;
963 String str
= src
->read(n
);
965 raise_warning("error reading from file at offset %d", start_pos
);
970 start_pos
+= str
.size();
976 void MimePart::callUserFunc(const String
& s
) {
977 vm_call_user_func(m_extract_context
, make_packed_array(s
));
980 void MimePart::outputToStdout(const String
& s
) {
984 void MimePart::outputToFile(const String
& s
) {
985 cast
<File
>(m_extract_context
)->write(s
);
988 void MimePart::outputToString(const String
& s
) {
989 m_extract_context
= m_extract_context
.toString() + s
;
992 ///////////////////////////////////////////////////////////////////////////////