2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 2.00 of the Zend license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.zend.com/license/2_00.txt. |
12 | If you did not receive a copy of the Zend license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@zend.com so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/base/file.h"
19 #include "hphp/runtime/base/complex_types.h"
20 #include "hphp/runtime/base/string_buffer.h"
21 #include "hphp/runtime/base/type_conversions.h"
22 #include "hphp/runtime/base/builtin_functions.h"
23 #include "hphp/runtime/server/static_content_cache.h"
24 #include "hphp/runtime/server/virtual_host.h"
25 #include "hphp/runtime/base/runtime_option.h"
26 #include "hphp/runtime/base/runtime_error.h"
27 #include "hphp/runtime/base/array_init.h"
28 #include "hphp/util/logger.h"
29 #include "hphp/util/process.h"
30 #include "hphp/util/util.h"
31 #include "hphp/runtime/base/zend-string.h"
32 #include "hphp/runtime/base/zend-printf.h"
33 #include "hphp/runtime/base/exceptions.h"
35 #include "hphp/runtime/base/array_iterator.h"
36 #include "hphp/runtime/base/stream_wrapper_registry.h"
39 ///////////////////////////////////////////////////////////////////////////////
42 StaticString
File::s_class_name("File");
43 StaticString
File::s_resource_name("stream");
45 IMPLEMENT_REQUEST_LOCAL(FileData
, s_file_data
);
47 const int File::USE_INCLUDE_PATH
= 1;
49 String
File::TranslatePathKeepRelative(CStrRef filename
) {
50 String
canonicalized(Util::canonicalize(filename
.data(),
51 filename
.size()), AttachString
);
53 if (RuntimeOption::SafeFileAccess
) {
54 const vector
<string
> &allowedDirectories
=
55 VirtualHost::GetAllowedDirectories();
56 auto it
= std::upper_bound(allowedDirectories
.begin(),
57 allowedDirectories
.end(), canonicalized
,
58 [](CStrRef val
, const string
& dir
) {
59 return strcmp(val
.c_str(), dir
.c_str()) < 0;
61 if (it
!= allowedDirectories
.begin()) {
62 const string
& dir
= *--it
;
63 if (dir
.size() <= canonicalized
.size() &&
64 !strncmp(dir
.c_str(), canonicalized
.c_str(), dir
.size())) {
69 // disallow access with an absolute path
70 if (canonicalized
.charAt(0) == '/') {
74 // unresolvable paths are all considered as unsafe
75 if (canonicalized
.find("..") >= 0) {
76 assert(canonicalized
.find("..") == 0);
84 String
File::TranslatePath(CStrRef filename
) {
85 String canonicalized
= TranslatePathKeepRelative(filename
);
87 if (canonicalized
.charAt(0) == '/') {
91 String cwd
= g_context
->getCwd();
92 if (!cwd
.empty() && cwd
[cwd
.length() - 1] == '/') {
93 return cwd
+ canonicalized
;
95 return cwd
+ "/" + canonicalized
;
98 String
File::TranslatePathWithFileCache(CStrRef filename
) {
99 String
canonicalized(Util::canonicalize(filename
.data(),
100 filename
.size()), AttachString
);
101 String translated
= TranslatePath(canonicalized
);
102 if (!translated
.empty() && access(translated
.data(), F_OK
) < 0 &&
103 StaticContentCache::TheFileCache
) {
104 if (StaticContentCache::TheFileCache
->exists(canonicalized
.data(),
106 // we use file cache's file name to make stat() work
107 translated
= String(RuntimeOption::FileCache
);
113 String
File::TranslateCommand(CStrRef cmd
) {
114 //TODO: security checking
118 bool File::IsVirtualDirectory(CStrRef filename
) {
119 if (StaticContentCache::TheFileCache
&&
120 StaticContentCache::TheFileCache
->dirExists(filename
.data(), false)) {
126 bool File::IsPlainFilePath(CStrRef filename
) {
127 return filename
.find("://") == String::npos
;
130 Variant
File::Open(CStrRef filename
, CStrRef mode
,
131 int options
/* = 0 */,
132 CVarRef context
/* = null */) {
133 Stream::Wrapper
*wrapper
= Stream::getWrapperFromURI(filename
);
134 File
*file
= wrapper
->open(filename
, mode
, options
, context
);
135 if (file
!= nullptr) {
136 file
->m_name
= filename
.data();
137 file
->m_mode
= mode
.data();
138 return Resource(file
);
143 ///////////////////////////////////////////////////////////////////////////////
144 // constructor and destructor
146 File::File(bool nonblocking
)
147 : m_isLocal(false), m_fd(-1), m_closed(false), m_nonblocking(nonblocking
),
148 m_writepos(0), m_readpos(0), m_position(0), m_buffer(nullptr) {
155 void File::closeImpl() {
162 ///////////////////////////////////////////////////////////////////////////////
163 // default implementation of virtual functions
166 if (m_writepos
> m_readpos
) {
168 return m_buffer
[m_readpos
++] & 0xff;
172 int64_t len
= readImpl(buffer
, 1);
177 return (int)(unsigned char)buffer
[0];
180 String
File::read(int64_t length
) {
182 raise_notice("Invalid length %" PRId64
, length
);
186 String s
= String(length
, ReserveString
);
187 char *ret
= s
.mutableSlice().ptr
;
189 int64_t avail
= m_writepos
- m_readpos
;
191 while (avail
< length
&& !eof()) {
192 if (m_buffer
== nullptr) {
193 m_buffer
= (char *)malloc(CHUNK_SIZE
);
197 memcpy(ret
+ copied
, m_buffer
+ m_readpos
, avail
);
202 m_writepos
= readImpl(m_buffer
, CHUNK_SIZE
);
204 avail
= m_writepos
- m_readpos
;
206 if (avail
== 0 || m_nonblocking
) {
207 // For nonblocking mode, temporary out of data.
212 avail
= m_writepos
- m_readpos
;
214 int64_t n
= length
< avail
? length
: avail
;
215 memcpy(ret
+ copied
, m_buffer
+ m_readpos
, n
);
220 m_position
+= copied
;
221 return s
.setSize(copied
);
224 int64_t File::write(CStrRef data
, int64_t length
/* = 0 */) {
226 int64_t offset
= m_readpos
- m_writepos
;
227 m_readpos
= m_writepos
= 0; // invalidating read buffer
228 seek(offset
, SEEK_CUR
);
230 if (length
<= 0 || length
> data
.size()) {
231 length
= data
.size();
234 int64_t written
= writeImpl(data
.data(), length
);
235 m_position
+= written
;
241 int File::putc(char c
) {
244 int ret
= writeImpl(buf
, 1);
249 bool File::seek(int64_t offset
, int whence
/* = SEEK_SET */) {
250 if (whence
!= SEEK_CUR
) {
251 throw NotSupportedException(__func__
, "cannot seek other than SEEK_CUR");
254 throw NotSupportedException(__func__
, "cannot seek backwards");
257 int64_t avail
= m_writepos
- m_readpos
;
259 if (avail
>= offset
) {
270 int64_t nread
= offset
> (int64_t)sizeof(tmp
) ? (int64_t)sizeof(tmp
) : offset
;
271 nread
= readImpl(tmp
, nread
);
281 int64_t File::tell() {
282 throw NotSupportedException(__func__
, "cannot tell");
286 throw NotSupportedException(__func__
, "cannot test eof");
289 bool File::rewind() {
290 throw NotSupportedException(__func__
, "cannot rewind");
297 bool File::truncate(int64_t size
) {
298 throw NotSupportedException(__func__
, "cannot truncate");
301 bool File::lock(int operation
) {
303 return lock(operation
, b
);
306 bool File::lock(int operation
, bool &wouldblock
/* = false */) {
310 if (flock(m_fd
, operation
)) {
311 if (errno
== EWOULDBLOCK
) {
320 s_wrapper_type("wrapper_type"),
321 s_stream_type("stream_type"),
323 s_unread_bytes("unread_bytes"),
324 s_seekable("seekable"),
326 s_timed_out("timed_out"),
327 s_blocked("blocked"),
329 s_wrapper_data("wrapper_data");
331 Array
File::getMetaData() {
333 ret
.set(s_wrapper_type
, o_getClassName());
334 ret
.set(s_stream_type
, getStreamType());
335 ret
.set(s_mode
, String(m_mode
));
336 ret
.set(s_unread_bytes
, 0);
337 ret
.set(s_seekable
, seekable());
338 ret
.set(s_uri
, String(m_name
));
339 ret
.set(s_timed_out
, false);
340 ret
.set(s_blocked
, true);
341 ret
.set(s_eof
, eof());
342 ret
.set(s_wrapper_data
, getWrapperMetaData());
346 ///////////////////////////////////////////////////////////////////////////////
349 String
File::readLine(int64_t maxlen
/* = 0 */) {
350 size_t current_buf_size
= 0;
351 size_t total_copied
= 0;
354 int64_t avail
= m_writepos
- m_readpos
;
359 char *readptr
= m_buffer
+ m_readpos
;
363 cr
= (const char *)memchr(readptr
, '\r', avail
);
364 lf
= (const char *)memchr(readptr
, '\n', avail
);
365 if (cr
&& lf
!= cr
+ 1 && !(lf
&& lf
< cr
)) {
368 } else if ((cr
&& lf
&& cr
== lf
- 1) || (lf
)) {
369 /* dos or unix endings */
376 cpysz
= eol
- readptr
+ 1;
381 if (maxlen
> 0 && maxlen
<= cpysz
) {
386 current_buf_size
+= cpysz
+ 1;
388 ret
= (char *)realloc(ret
, current_buf_size
);
390 ret
= (char *)malloc(current_buf_size
);
392 memcpy(ret
+ total_copied
, readptr
, cpysz
);
397 total_copied
+= cpysz
;
405 if (m_buffer
== nullptr) {
406 m_buffer
= (char *)malloc(CHUNK_SIZE
);
408 m_writepos
= readImpl(m_buffer
, CHUNK_SIZE
);
410 if (m_writepos
- m_readpos
== 0) {
416 if (total_copied
== 0) {
417 assert(ret
== nullptr);
421 ret
[total_copied
] = '\0';
422 return String(ret
, total_copied
, AttachString
);
425 String
File::readRecord(CStrRef delimiter
, int64_t maxlen
/* = 0 */) {
426 if (eof() && m_writepos
== m_readpos
) {
430 if (maxlen
<= 0 || maxlen
> CHUNK_SIZE
) {
434 int64_t avail
= m_writepos
- m_readpos
;
435 if (m_buffer
== nullptr) {
436 m_buffer
= (char *)malloc(CHUNK_SIZE
* 3);
438 if (avail
< maxlen
&& !eof()) {
439 assert(m_writepos
+ maxlen
- avail
<= CHUNK_SIZE
* 3);
440 m_writepos
+= readImpl(m_buffer
+ m_writepos
, maxlen
- avail
);
441 maxlen
= m_writepos
- m_readpos
;
443 if (m_readpos
>= CHUNK_SIZE
) {
444 memcpy(m_buffer
, m_buffer
+ m_readpos
, m_writepos
- m_readpos
);
445 m_writepos
-= m_readpos
;
452 if (delimiter
.empty()) {
455 if (delimiter
.size() == 1) {
456 e
= (const char *)memchr(m_buffer
+ m_readpos
, delimiter
.charAt(0),
457 m_writepos
- m_readpos
);
459 int64_t pos
= string_find(m_buffer
+ m_readpos
, m_writepos
- m_readpos
,
460 delimiter
.data(), delimiter
.size(), 0, true);
462 e
= m_buffer
+ m_readpos
+ pos
;
471 toread
= e
- m_buffer
- m_readpos
;
476 if (toread
> maxlen
&& maxlen
> 0) {
481 String s
= String(toread
, ReserveString
);
482 char *buf
= s
.mutableSlice().ptr
;
484 memcpy(buf
, m_buffer
+ m_readpos
, toread
);
489 m_readpos
+= delimiter
.size();
490 m_position
+= delimiter
.size();
492 return s
.setSize(toread
);
498 int64_t File::print() {
502 int64_t len
= readImpl(buffer
, 1024);
505 g_context
->write(buffer
, len
);
510 int64_t File::printf(CStrRef format
, CArrRef args
) {
512 char *output
= string_printf(format
.data(), format
.size(), args
, &len
);
513 return write(String(output
, len
, AttachString
));
516 ///////////////////////////////////////////////////////////////////////////////
519 int64_t File::writeCSV(CArrRef fields
, char delimiter_char
/* = ',' */,
520 char enclosure_char
/* = '"' */) {
522 int count
= fields
.size();
523 const char escape_char
= '\\';
524 StringBuffer
csvline(1024);
526 for (ArrayIter
iter(fields
); iter
; ++iter
) {
527 String value
= iter
.second().toString();
528 bool need_enclosure
= false;
529 for (int i
= 0; i
< value
.size(); i
++) {
530 char ch
= value
.charAt(i
);
531 if (ch
== delimiter_char
|| ch
== enclosure_char
|| ch
== escape_char
||
532 ch
== '\n' || ch
== '\r' || ch
== '\t' || ch
== ' ') {
533 need_enclosure
= true;
537 if (need_enclosure
) {
538 csvline
.append(enclosure_char
);
539 const char *ch
= value
.data();
540 const char *end
= ch
+ value
.size();
541 bool escaped
= false;
543 if (*ch
== escape_char
) {
545 } else if (!escaped
&& *ch
== enclosure_char
) {
546 csvline
.append(enclosure_char
);
553 csvline
.append(enclosure_char
);
555 csvline
.append(value
);
558 if (++line
!= count
) {
559 csvline
.append(delimiter_char
);
562 csvline
.append('\n');
564 return write(csvline
.detach());
567 static const char *lookup_trailing_spaces(const char *ptr
, int len
) {
570 switch (*(ptr
- 1)) {
572 if (len
> 1 && *(ptr
- 2) == '\r') {
575 /* break is omitted intentionally */
583 Array
File::readCSV(int64_t length
/* = 0 */, char delimiter_char
/* = ',' */,
584 char enclosure_char
/* = '"' */,
585 char escape_char
/* = '\\' */) {
586 String line
= readLine(length
);
592 const char *buf
= line
.data();
593 int64_t buf_len
= line
.size();
595 char *temp
, *tptr
, *line_end
, *limit
;
598 int64_t temp_len
, line_end_len
;
599 bool first_field
= true;
601 /* Now into new section that parses buf for delimiter/enclosure fields */
603 /* Strip trailing space from buf, saving end of line in case required
604 for enclosure field */
606 tptr
= (char *)lookup_trailing_spaces(buf
, buf_len
);
607 line_end_len
= buf_len
- (size_t)(tptr
- buf
);
608 line_end
= limit
= tptr
;
610 /* reserve workspace for building each individual field */
612 temp
= (char *)malloc(temp_len
+ line_end_len
+ 1);
614 /* Initialize return array */
617 /* Main loop to read CSV fields */
618 /* NB this routine will return a single null entry for a blank line */
621 const char *hunk_begin
;
625 /* 1. Strip any leading space */
626 for (; bptr
< limit
; ++bptr
) {
627 if (!isspace((int)*(unsigned char *)bptr
) || *bptr
== delimiter_char
) {
632 if (first_field
&& bptr
== line_end
) {
633 ret
.append(null_variant
);
638 /* 2. Read field, leaving bptr pointing at start of next field */
639 if (bptr
< limit
&& *bptr
== enclosure_char
) {
642 bptr
++; /* move on to first character in field */
645 /* 2A. handle enclosure delimited field */
653 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
- 1);
654 tptr
+= (bptr
- hunk_begin
- 1);
659 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
);
660 tptr
+= (bptr
- hunk_begin
);
662 /* break is omitted intentionally */
665 if (hunk_begin
!= line_end
) {
666 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
);
667 tptr
+= (bptr
- hunk_begin
);
670 /* add the embedded line end to the field */
671 memcpy(tptr
, line_end
, line_end_len
);
672 tptr
+= line_end_len
;
674 new_line
= readLine(length
);
675 const char *new_buf
= new_line
.data();
676 int64_t new_len
= new_line
.size();
678 /* we've got an unterminated enclosure,
679 * assign all the data from the start of
680 * the enclosure to end of data to the
682 if ((size_t)temp_len
> (size_t)(limit
- buf
)) {
688 char *new_temp
= (char*)realloc(temp
, temp_len
);
689 tptr
= new_temp
+ (size_t)(tptr
- temp
);
693 bptr
= buf
= new_buf
;
696 line_end
= limit
= (char *)lookup_trailing_spaces(buf
, buf_len
);
697 line_end_len
= buf_len
- (size_t)(limit
- buf
);
704 /* we need to determine if the enclosure is
705 * 'real' or is it escaped */
707 case 1: /* escaped */
711 case 2: /* embedded enclosure ? let's check it */
712 if (*bptr
!= enclosure_char
) {
714 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
- 1);
715 tptr
+= (bptr
- hunk_begin
- 1);
719 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
);
720 tptr
+= (bptr
- hunk_begin
);
726 if (*bptr
== escape_char
) {
728 } else if (*bptr
== enclosure_char
) {
736 inc_len
= (bptr
< limit
? 1 : 0);
740 /* look up for a delimiter */
741 for (; bptr
< limit
; ++bptr
) {
742 if (*bptr
== delimiter_char
) {
747 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
);
748 tptr
+= (bptr
- hunk_begin
);
749 if (bptr
< limit
) ++bptr
;
752 /* 2B. Handle non-enclosure field */
756 for (; bptr
< limit
; ++bptr
) {
757 if (*bptr
== delimiter_char
) {
761 memcpy(tptr
, hunk_begin
, bptr
- hunk_begin
);
762 tptr
+= (bptr
- hunk_begin
);
764 comp_end
= (char *)lookup_trailing_spaces(temp
, tptr
- temp
);
765 if (*bptr
== delimiter_char
) {
770 /* 3. Now pass our field back to php */
772 ret
.append(String(temp
, comp_end
- temp
, CopyString
));
773 } while (bptr
< limit
);
779 String
File::getLastError() {
780 return Util::safe_strerror(errno
);
784 ///////////////////////////////////////////////////////////////////////////////