Rename runtime/base/zend_* to zend-
[hiphop-php.git] / hphp / runtime / base / file.cpp
blob36a54bfcc049cebdc2f7f2458e3def06db14fe5c
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 2.00 of the Zend license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.zend.com/license/2_00.txt. |
12 | If you did not receive a copy of the Zend license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@zend.com so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/base/file.h"
19 #include "hphp/runtime/base/complex_types.h"
20 #include "hphp/runtime/base/string_buffer.h"
21 #include "hphp/runtime/base/type_conversions.h"
22 #include "hphp/runtime/base/builtin_functions.h"
23 #include "hphp/runtime/server/static_content_cache.h"
24 #include "hphp/runtime/server/virtual_host.h"
25 #include "hphp/runtime/base/runtime_option.h"
26 #include "hphp/runtime/base/runtime_error.h"
27 #include "hphp/runtime/base/array_init.h"
28 #include "hphp/util/logger.h"
29 #include "hphp/util/process.h"
30 #include "hphp/util/util.h"
31 #include "hphp/runtime/base/zend-string.h"
32 #include "hphp/runtime/base/zend-printf.h"
33 #include "hphp/runtime/base/exceptions.h"
34 #include <sys/file.h>
35 #include "hphp/runtime/base/array_iterator.h"
36 #include "hphp/runtime/base/stream_wrapper_registry.h"
38 namespace HPHP {
39 ///////////////////////////////////////////////////////////////////////////////
40 // statics
42 StaticString File::s_class_name("File");
43 StaticString File::s_resource_name("stream");
45 IMPLEMENT_REQUEST_LOCAL(FileData, s_file_data);
47 const int File::USE_INCLUDE_PATH = 1;
49 String File::TranslatePathKeepRelative(CStrRef filename) {
50 String canonicalized(Util::canonicalize(filename.data(),
51 filename.size()), AttachString);
53 if (RuntimeOption::SafeFileAccess) {
54 const vector<string> &allowedDirectories =
55 VirtualHost::GetAllowedDirectories();
56 auto it = std::upper_bound(allowedDirectories.begin(),
57 allowedDirectories.end(), canonicalized,
58 [](CStrRef val, const string& dir) {
59 return strcmp(val.c_str(), dir.c_str()) < 0;
60 });
61 if (it != allowedDirectories.begin()) {
62 const string& dir = *--it;
63 if (dir.size() <= canonicalized.size() &&
64 !strncmp(dir.c_str(), canonicalized.c_str(), dir.size())) {
65 return canonicalized;
69 // disallow access with an absolute path
70 if (canonicalized.charAt(0) == '/') {
71 return "";
74 // unresolvable paths are all considered as unsafe
75 if (canonicalized.find("..") >= 0) {
76 assert(canonicalized.find("..") == 0);
77 return "";
81 return canonicalized;
84 String File::TranslatePath(CStrRef filename) {
85 String canonicalized = TranslatePathKeepRelative(filename);
87 if (canonicalized.charAt(0) == '/') {
88 return canonicalized;
91 String cwd = g_context->getCwd();
92 if (!cwd.empty() && cwd[cwd.length() - 1] == '/') {
93 return cwd + canonicalized;
95 return cwd + "/" + canonicalized;
98 String File::TranslatePathWithFileCache(CStrRef filename) {
99 String canonicalized(Util::canonicalize(filename.data(),
100 filename.size()), AttachString);
101 String translated = TranslatePath(canonicalized);
102 if (!translated.empty() && access(translated.data(), F_OK) < 0 &&
103 StaticContentCache::TheFileCache) {
104 if (StaticContentCache::TheFileCache->exists(canonicalized.data(),
105 false)) {
106 // we use file cache's file name to make stat() work
107 translated = String(RuntimeOption::FileCache);
110 return translated;
113 String File::TranslateCommand(CStrRef cmd) {
114 //TODO: security checking
115 return cmd;
118 bool File::IsVirtualDirectory(CStrRef filename) {
119 if (StaticContentCache::TheFileCache &&
120 StaticContentCache::TheFileCache->dirExists(filename.data(), false)) {
121 return true;
123 return false;
126 bool File::IsPlainFilePath(CStrRef filename) {
127 return filename.find("://") == String::npos;
130 Variant File::Open(CStrRef filename, CStrRef mode,
131 int options /* = 0 */,
132 CVarRef context /* = null */) {
133 Stream::Wrapper *wrapper = Stream::getWrapperFromURI(filename);
134 File *file = wrapper->open(filename, mode, options, context);
135 if (file != nullptr) {
136 file->m_name = filename.data();
137 file->m_mode = mode.data();
138 return Resource(file);
140 return false;
143 ///////////////////////////////////////////////////////////////////////////////
144 // constructor and destructor
146 File::File(bool nonblocking)
147 : m_isLocal(false), m_fd(-1), m_closed(false), m_nonblocking(nonblocking),
148 m_writepos(0), m_readpos(0), m_position(0), m_buffer(nullptr) {
151 File::~File() {
152 closeImpl();
155 void File::closeImpl() {
156 if (m_buffer) {
157 free(m_buffer);
158 m_buffer = nullptr;
162 ///////////////////////////////////////////////////////////////////////////////
163 // default implementation of virtual functions
165 int File::getc() {
166 if (m_writepos > m_readpos) {
167 m_position++;
168 return m_buffer[m_readpos++] & 0xff;
171 char buffer[1];
172 int64_t len = readImpl(buffer, 1);
173 if (len != 1) {
174 return EOF;
176 m_position += len;
177 return (int)(unsigned char)buffer[0];
180 String File::read(int64_t length) {
181 if (length <= 0) {
182 raise_notice("Invalid length %" PRId64, length);
183 return "";
186 String s = String(length, ReserveString);
187 char *ret = s.mutableSlice().ptr;
188 int64_t copied = 0;
189 int64_t avail = m_writepos - m_readpos;
191 while (avail < length && !eof()) {
192 if (m_buffer == nullptr) {
193 m_buffer = (char *)malloc(CHUNK_SIZE);
196 if (avail > 0) {
197 memcpy(ret + copied, m_buffer + m_readpos, avail);
198 copied += avail;
199 length -= avail;
202 m_writepos = readImpl(m_buffer, CHUNK_SIZE);
203 m_readpos = 0;
204 avail = m_writepos - m_readpos;
206 if (avail == 0 || m_nonblocking) {
207 // For nonblocking mode, temporary out of data.
208 break;
212 avail = m_writepos - m_readpos;
213 if (avail > 0) {
214 int64_t n = length < avail ? length : avail;
215 memcpy(ret + copied, m_buffer + m_readpos, n);
216 m_readpos += n;
217 copied += n;
220 m_position += copied;
221 return s.setSize(copied);
224 int64_t File::write(CStrRef data, int64_t length /* = 0 */) {
225 if (seekable()) {
226 int64_t offset = m_readpos - m_writepos;
227 m_readpos = m_writepos = 0; // invalidating read buffer
228 seek(offset, SEEK_CUR);
230 if (length <= 0 || length > data.size()) {
231 length = data.size();
233 if (length) {
234 int64_t written = writeImpl(data.data(), length);
235 m_position += written;
236 return written;
238 return 0;
241 int File::putc(char c) {
242 char buf[1];
243 buf[0] = c;
244 int ret = writeImpl(buf, 1);
245 m_position += ret;
246 return ret;
249 bool File::seek(int64_t offset, int whence /* = SEEK_SET */) {
250 if (whence != SEEK_CUR) {
251 throw NotSupportedException(__func__, "cannot seek other than SEEK_CUR");
253 if (offset < 0) {
254 throw NotSupportedException(__func__, "cannot seek backwards");
256 if (offset > 0) {
257 int64_t avail = m_writepos - m_readpos;
258 assert(avail >= 0);
259 if (avail >= offset) {
260 m_readpos += offset;
261 return true;
263 if (avail > 0) {
264 m_readpos += avail;
265 offset -= avail;
268 while (offset) {
269 char tmp[1024];
270 int64_t nread = offset > (int64_t)sizeof(tmp) ? (int64_t)sizeof(tmp) : offset;
271 nread = readImpl(tmp, nread);
272 if (nread <= 0) {
273 return false;
275 offset -= nread;
278 return true;
281 int64_t File::tell() {
282 throw NotSupportedException(__func__, "cannot tell");
285 bool File::eof() {
286 throw NotSupportedException(__func__, "cannot test eof");
289 bool File::rewind() {
290 throw NotSupportedException(__func__, "cannot rewind");
293 bool File::flush() {
294 return true;
297 bool File::truncate(int64_t size) {
298 throw NotSupportedException(__func__, "cannot truncate");
301 bool File::lock(int operation) {
302 bool b = false;
303 return lock(operation, b);
306 bool File::lock(int operation, bool &wouldblock /* = false */) {
307 assert(m_fd >= 0);
309 wouldblock = false;
310 if (flock(m_fd, operation)) {
311 if (errno == EWOULDBLOCK) {
312 wouldblock = true;
314 return false;
316 return true;
319 const StaticString
320 s_wrapper_type("wrapper_type"),
321 s_stream_type("stream_type"),
322 s_mode("mode"),
323 s_unread_bytes("unread_bytes"),
324 s_seekable("seekable"),
325 s_uri("uri"),
326 s_timed_out("timed_out"),
327 s_blocked("blocked"),
328 s_eof("eof"),
329 s_wrapper_data("wrapper_data");
331 Array File::getMetaData() {
332 ArrayInit ret(10);
333 ret.set(s_wrapper_type, o_getClassName());
334 ret.set(s_stream_type, getStreamType());
335 ret.set(s_mode, String(m_mode));
336 ret.set(s_unread_bytes, 0);
337 ret.set(s_seekable, seekable());
338 ret.set(s_uri, String(m_name));
339 ret.set(s_timed_out, false);
340 ret.set(s_blocked, true);
341 ret.set(s_eof, eof());
342 ret.set(s_wrapper_data, getWrapperMetaData());
343 return ret.create();
346 ///////////////////////////////////////////////////////////////////////////////
347 // utility functions
349 String File::readLine(int64_t maxlen /* = 0 */) {
350 size_t current_buf_size = 0;
351 size_t total_copied = 0;
352 char *ret = nullptr;
353 for (;;) {
354 int64_t avail = m_writepos - m_readpos;
355 if (avail > 0) {
356 int64_t cpysz = 0;
357 bool done = false;
359 char *readptr = m_buffer + m_readpos;
360 const char *eol;
361 const char *cr;
362 const char *lf;
363 cr = (const char *)memchr(readptr, '\r', avail);
364 lf = (const char *)memchr(readptr, '\n', avail);
365 if (cr && lf != cr + 1 && !(lf && lf < cr)) {
366 /* mac */
367 eol = cr;
368 } else if ((cr && lf && cr == lf - 1) || (lf)) {
369 /* dos or unix endings */
370 eol = lf;
371 } else {
372 eol = cr;
375 if (eol) {
376 cpysz = eol - readptr + 1;
377 done = true;
378 } else {
379 cpysz = avail;
381 if (maxlen > 0 && maxlen <= cpysz) {
382 cpysz = maxlen;
383 done = true;
386 current_buf_size += cpysz + 1;
387 if (ret) {
388 ret = (char *)realloc(ret, current_buf_size);
389 } else {
390 ret = (char *)malloc(current_buf_size);
392 memcpy(ret + total_copied, readptr, cpysz);
394 m_position += cpysz;
395 m_readpos += cpysz;
396 maxlen -= cpysz;
397 total_copied += cpysz;
399 if (done) {
400 break;
402 } else if (eof()) {
403 break;
404 } else {
405 if (m_buffer == nullptr) {
406 m_buffer = (char *)malloc(CHUNK_SIZE);
408 m_writepos = readImpl(m_buffer, CHUNK_SIZE);
409 m_readpos = 0;
410 if (m_writepos - m_readpos == 0) {
411 break;
416 if (total_copied == 0) {
417 assert(ret == nullptr);
418 return String();
421 ret[total_copied] = '\0';
422 return String(ret, total_copied, AttachString);
425 String File::readRecord(CStrRef delimiter, int64_t maxlen /* = 0 */) {
426 if (eof() && m_writepos == m_readpos) {
427 return empty_string;
430 if (maxlen <= 0 || maxlen > CHUNK_SIZE) {
431 maxlen = CHUNK_SIZE;
434 int64_t avail = m_writepos - m_readpos;
435 if (m_buffer == nullptr) {
436 m_buffer = (char *)malloc(CHUNK_SIZE * 3);
438 if (avail < maxlen && !eof()) {
439 assert(m_writepos + maxlen - avail <= CHUNK_SIZE * 3);
440 m_writepos += readImpl(m_buffer + m_writepos, maxlen - avail);
441 maxlen = m_writepos - m_readpos;
443 if (m_readpos >= CHUNK_SIZE) {
444 memcpy(m_buffer, m_buffer + m_readpos, m_writepos - m_readpos);
445 m_writepos -= m_readpos;
446 m_readpos = 0;
449 int64_t toread;
450 const char *e;
451 bool skip = false;
452 if (delimiter.empty()) {
453 toread = maxlen;
454 } else {
455 if (delimiter.size() == 1) {
456 e = (const char *)memchr(m_buffer + m_readpos, delimiter.charAt(0),
457 m_writepos - m_readpos);
458 } else {
459 int64_t pos = string_find(m_buffer + m_readpos, m_writepos - m_readpos,
460 delimiter.data(), delimiter.size(), 0, true);
461 if (pos >= 0) {
462 e = m_buffer + m_readpos + pos;
463 } else {
464 e = nullptr;
468 if (!e) {
469 toread = maxlen;
470 } else {
471 toread = e - m_buffer - m_readpos;
472 skip = true;
476 if (toread > maxlen && maxlen > 0) {
477 toread = maxlen;
480 if (toread >= 0) {
481 String s = String(toread, ReserveString);
482 char *buf = s.mutableSlice().ptr;
483 if (toread) {
484 memcpy(buf, m_buffer + m_readpos, toread);
487 m_readpos += toread;
488 if (skip) {
489 m_readpos += delimiter.size();
490 m_position += delimiter.size();
492 return s.setSize(toread);
495 return empty_string;
498 int64_t File::print() {
499 int64_t total = 0;
500 while (true) {
501 char buffer[1024];
502 int64_t len = readImpl(buffer, 1024);
503 if (len == 0) break;
504 total += len;
505 g_context->write(buffer, len);
507 return total;
510 int64_t File::printf(CStrRef format, CArrRef args) {
511 int len = 0;
512 char *output = string_printf(format.data(), format.size(), args, &len);
513 return write(String(output, len, AttachString));
516 ///////////////////////////////////////////////////////////////////////////////
517 // csv functions
519 int64_t File::writeCSV(CArrRef fields, char delimiter_char /* = ',' */,
520 char enclosure_char /* = '"' */) {
521 int line = 0;
522 int count = fields.size();
523 const char escape_char = '\\';
524 StringBuffer csvline(1024);
526 for (ArrayIter iter(fields); iter; ++iter) {
527 String value = iter.second().toString();
528 bool need_enclosure = false;
529 for (int i = 0; i < value.size(); i++) {
530 char ch = value.charAt(i);
531 if (ch == delimiter_char || ch == enclosure_char || ch == escape_char ||
532 ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ') {
533 need_enclosure = true;
534 break;
537 if (need_enclosure) {
538 csvline.append(enclosure_char);
539 const char *ch = value.data();
540 const char *end = ch + value.size();
541 bool escaped = false;
542 while (ch < end) {
543 if (*ch == escape_char) {
544 escaped = true;
545 } else if (!escaped && *ch == enclosure_char) {
546 csvline.append(enclosure_char);
547 } else {
548 escaped = false;
550 csvline.append(*ch);
551 ch++;
553 csvline.append(enclosure_char);
554 } else {
555 csvline.append(value);
558 if (++line != count) {
559 csvline.append(delimiter_char);
562 csvline.append('\n');
564 return write(csvline.detach());
567 static const char *lookup_trailing_spaces(const char *ptr, int len) {
568 if (len > 0) {
569 ptr += len;
570 switch (*(ptr - 1)) {
571 case '\n':
572 if (len > 1 && *(ptr - 2) == '\r') {
573 return ptr - 2;
575 /* break is omitted intentionally */
576 case '\r':
577 return ptr - 1;
580 return ptr;
583 Array File::readCSV(int64_t length /* = 0 */, char delimiter_char /* = ',' */,
584 char enclosure_char /* = '"' */,
585 char escape_char /* = '\\' */) {
586 String line = readLine(length);
587 if (line.empty()) {
588 return Array();
591 String new_line;
592 const char *buf = line.data();
593 int64_t buf_len = line.size();
595 char *temp, *tptr, *line_end, *limit;
596 const char *bptr;
598 int64_t temp_len, line_end_len;
599 bool first_field = true;
601 /* Now into new section that parses buf for delimiter/enclosure fields */
603 /* Strip trailing space from buf, saving end of line in case required
604 for enclosure field */
605 bptr = buf;
606 tptr = (char *)lookup_trailing_spaces(buf, buf_len);
607 line_end_len = buf_len - (size_t)(tptr - buf);
608 line_end = limit = tptr;
610 /* reserve workspace for building each individual field */
611 temp_len = buf_len;
612 temp = (char *)malloc(temp_len + line_end_len + 1);
614 /* Initialize return array */
615 Array ret;
617 /* Main loop to read CSV fields */
618 /* NB this routine will return a single null entry for a blank line */
619 do {
620 char *comp_end;
621 const char *hunk_begin;
623 tptr = temp;
625 /* 1. Strip any leading space */
626 for (; bptr < limit; ++bptr) {
627 if (!isspace((int)*(unsigned char *)bptr) || *bptr == delimiter_char) {
628 break;
632 if (first_field && bptr == line_end) {
633 ret.append(null_variant);
634 break;
636 first_field = false;
638 /* 2. Read field, leaving bptr pointing at start of next field */
639 if (bptr < limit && *bptr == enclosure_char) {
640 int state = 0;
642 bptr++; /* move on to first character in field */
643 hunk_begin = bptr;
645 /* 2A. handle enclosure delimited field */
647 int inc_len = 1;
648 for (;;) {
649 switch (inc_len) {
650 case 0:
651 switch (state) {
652 case 2:
653 memcpy(tptr, hunk_begin, bptr - hunk_begin - 1);
654 tptr += (bptr - hunk_begin - 1);
655 hunk_begin = bptr;
656 goto quit_loop_2;
658 case 1:
659 memcpy(tptr, hunk_begin, bptr - hunk_begin);
660 tptr += (bptr - hunk_begin);
661 hunk_begin = bptr;
662 /* break is omitted intentionally */
663 case 0:
665 if (hunk_begin != line_end) {
666 memcpy(tptr, hunk_begin, bptr - hunk_begin);
667 tptr += (bptr - hunk_begin);
668 hunk_begin = bptr;
670 /* add the embedded line end to the field */
671 memcpy(tptr, line_end, line_end_len);
672 tptr += line_end_len;
674 new_line = readLine(length);
675 const char *new_buf = new_line.data();
676 int64_t new_len = new_line.size();
677 if (new_len == 0) {
678 /* we've got an unterminated enclosure,
679 * assign all the data from the start of
680 * the enclosure to end of data to the
681 * last element */
682 if ((size_t)temp_len > (size_t)(limit - buf)) {
683 goto quit_loop_2;
685 return ret;
687 temp_len += new_len;
688 char *new_temp = (char*)realloc(temp, temp_len);
689 tptr = new_temp + (size_t)(tptr - temp);
690 temp = new_temp;
692 buf_len = new_len;
693 bptr = buf = new_buf;
694 hunk_begin = buf;
696 line_end = limit = (char *)lookup_trailing_spaces(buf, buf_len);
697 line_end_len = buf_len - (size_t)(limit - buf);
698 state = 0;
700 break;
702 break;
703 case 1:
704 /* we need to determine if the enclosure is
705 * 'real' or is it escaped */
706 switch (state) {
707 case 1: /* escaped */
708 bptr++;
709 state = 0;
710 break;
711 case 2: /* embedded enclosure ? let's check it */
712 if (*bptr != enclosure_char) {
713 /* real enclosure */
714 memcpy(tptr, hunk_begin, bptr - hunk_begin - 1);
715 tptr += (bptr - hunk_begin - 1);
716 hunk_begin = bptr;
717 goto quit_loop_2;
719 memcpy(tptr, hunk_begin, bptr - hunk_begin);
720 tptr += (bptr - hunk_begin);
721 bptr++;
722 hunk_begin = bptr;
723 state = 0;
724 break;
725 default:
726 if (*bptr == escape_char) {
727 state = 1;
728 } else if (*bptr == enclosure_char) {
729 state = 2;
731 bptr++;
732 break;
734 break;
736 inc_len = (bptr < limit ? 1 : 0);
739 quit_loop_2:
740 /* look up for a delimiter */
741 for (; bptr < limit; ++bptr) {
742 if (*bptr == delimiter_char) {
743 break;
747 memcpy(tptr, hunk_begin, bptr - hunk_begin);
748 tptr += (bptr - hunk_begin);
749 if (bptr < limit) ++bptr;
750 comp_end = tptr;
751 } else {
752 /* 2B. Handle non-enclosure field */
754 hunk_begin = bptr;
756 for (; bptr < limit; ++bptr) {
757 if (*bptr == delimiter_char) {
758 break;
761 memcpy(tptr, hunk_begin, bptr - hunk_begin);
762 tptr += (bptr - hunk_begin);
764 comp_end = (char *)lookup_trailing_spaces(temp, tptr - temp);
765 if (*bptr == delimiter_char) {
766 bptr++;
770 /* 3. Now pass our field back to php */
771 *comp_end = '\0';
772 ret.append(String(temp, comp_end - temp, CopyString));
773 } while (bptr < limit);
775 free(temp);
776 return ret;
779 String File::getLastError() {
780 return Util::safe_strerror(errno);
784 ///////////////////////////////////////////////////////////////////////////////