Rename IntishCast enum values
[hiphop-php.git] / hphp / runtime / ext / fb / ext_fb.cpp
blob780f41e9b0f5fca838f059964dd601176aeef9fe
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/fb/ext_fb.h"
19 #include <fstream>
21 #include <unicode/uchar.h>
22 #include <unicode/utf8.h>
23 #include <algorithm>
24 #include <memory>
25 #include <utility>
26 #include <vector>
28 #include <folly/String.h>
29 #include <folly/portability/Sockets.h>
31 #include "hphp/util/htonll.h"
32 #include "hphp/util/logger.h"
33 #include "hphp/runtime/base/array-init.h"
34 #include "hphp/runtime/base/builtin-functions.h"
35 #include "hphp/runtime/base/code-coverage.h"
36 #include "hphp/runtime/base/externals.h"
37 #include "hphp/runtime/base/file.h"
38 #include "hphp/runtime/base/file-util.h"
39 #include "hphp/runtime/base/plain-file.h"
40 #include "hphp/runtime/base/unit-cache.h"
41 #include "hphp/runtime/base/intercept.h"
42 #include "hphp/runtime/base/runtime-option.h"
43 #include "hphp/runtime/base/stat-cache.h"
44 #include "hphp/runtime/base/string-buffer.h"
45 #include "hphp/runtime/base/string-util.h"
46 #include "hphp/runtime/base/request-info.h"
47 #include "hphp/runtime/base/tv-type.h"
48 #include "hphp/runtime/ext/std/ext_std_function.h"
49 #include "hphp/runtime/ext/fb/FBSerialize/FBSerialize.h"
50 #include "hphp/runtime/ext/fb/VariantController.h"
51 #include "hphp/runtime/vm/unwind.h"
52 #include "hphp/zend/zend-string.h"
54 namespace HPHP {
56 // fb_serialize options
57 const int64_t k_FB_SERIALIZE_HACK_ARRAYS = 1<<1;
59 ///////////////////////////////////////////////////////////////////////////////
61 static const UChar32 SUBSTITUTION_CHARACTER = 0xFFFD;
63 #define FB_UNSERIALIZE_NONSTRING_VALUE 0x0001
64 #define FB_UNSERIALIZE_UNEXPECTED_END 0x0002
65 #define FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE 0x0003
66 #define FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE 0x0004
68 #ifdef FACEBOOK
69 # define HHVM_FACEBOOK true
70 #else
71 # define HHVM_FACEBOOK false
72 #endif
74 ///////////////////////////////////////////////////////////////////////////////
76 /* enum of thrift types */
77 enum TType {
78 T_STOP = 1,
79 T_BYTE = 2,
80 T_U16 = 3,
81 T_I16 = 4,
82 T_U32 = 5,
83 T_I32 = 6,
84 T_U64 = 7,
85 T_I64 = 8,
86 T_STRING = 9,
87 T_STRUCT = 10,
88 T_MAP = 11,
89 T_SET = 12,
90 T_LIST = 13,
91 T_NULL = 14,
92 T_VARCHAR = 15,
93 T_DOUBLE = 16,
94 T_BOOLEAN = 17,
97 /* Return the smallest size int that can store the value */
98 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
99 ((x) == ((int16_t)x)) ? 2 : \
100 ((x) == ((int32_t)x)) ? 4 : 8)
102 /* Return the smallest (supported) unsigned length that can store the value */
103 #define LEN_SIZE(x) ((((unsigned)x) == ((uint8_t)x)) ? 1 : 4)
105 Variant HHVM_FUNCTION(fb_serialize, const Variant& thing, int64_t options) {
106 try {
107 if (options & k_FB_SERIALIZE_HACK_ARRAYS) {
108 size_t len = HPHP::serialize
109 ::FBSerializer<VariantControllerUsingHackArrays>
110 ::serializedSize(thing);
111 String s(len, ReserveString);
112 HPHP::serialize
113 ::FBSerializer<VariantControllerUsingHackArrays>
114 ::serialize(thing, s.mutableData());
115 s.setSize(len);
116 return s;
117 } else {
118 size_t len =
119 HPHP::serialize::FBSerializer<VariantController>::serializedSize(thing);
120 String s(len, ReserveString);
121 HPHP::serialize::FBSerializer<VariantController>::serialize(
122 thing, s.mutableData());
123 s.setSize(len);
124 return s;
126 } catch (const HPHP::serialize::KeysetSerializeError&) {
127 SystemLib::throwInvalidArgumentExceptionObject(
128 "Keysets cannot be serialized with fb_serialize"
130 } catch (const HPHP::serialize::HackArraySerializeError&) {
131 SystemLib::throwInvalidArgumentExceptionObject(
132 "Serializing Hack arrays requires the FB_SERIALIZE_HACK_ARRAYS "
133 "option to be provided"
135 } catch (const HPHP::serialize::SerializeError&) {
136 return init_null();
140 Variant HHVM_FUNCTION(fb_unserialize,
141 const Variant& thing,
142 VRefParam success,
143 int64_t options) {
144 if (thing.isString()) {
145 String sthing = thing.toString();
147 if (sthing.size() && (sthing.data()[0] & 0x80)) {
148 return fb_compact_unserialize(sthing.data(), sthing.size(),
149 success);
150 } else {
151 return fb_unserialize(sthing.data(), sthing.size(), success, options);
155 success.assignIfRef(false);
156 return false;
159 Variant fb_unserialize(const char* str,
160 int len,
161 VRefParam success,
162 int64_t options) {
163 try {
164 if (options & k_FB_SERIALIZE_HACK_ARRAYS) {
165 auto res = HPHP::serialize
166 ::FBUnserializer<VariantControllerUsingHackArrays>
167 ::unserialize(folly::StringPiece(str, len));
168 success.assignIfRef(true);
169 return res;
170 } else {
171 auto res = HPHP::serialize::FBUnserializer<VariantController>
172 ::unserialize(folly::StringPiece(str, len));
173 success.assignIfRef(true);
174 return res;
176 } catch (const HPHP::serialize::UnserializeError&) {
177 success.assignIfRef(false);
178 return false;
182 ///////////////////////////////////////////////////////////////////////////////
185 * FB Compact Serialize
186 * ====================
188 * === Compatibility with fb_unserialize ===
190 * Check the high bit in the first byte of the serialized string.
191 * If it's set, the string is fb_compact_serialize'd, otherwise it's
192 * fb_serialize'd.
194 * === Format ===
196 * A value is serialized as a string <c> <data> where c is a byte (0xf0 | code),
197 * code being one of:
199 * 0 (INT16): data is 2 bytes, network order signed int16
200 * 1 (INT32): data is 4 bytes, network order signed int32
201 * 2 (INT64): data is 8 bytes, network order signed int64
202 * All of these represent an int64 value.
204 * 3 (NULL): no data, null value
206 * 4 (TRUE),
207 * 5 (FALSE): no data, boolean value
209 * 6 (DOUBLE): data is 8 bytes, double value
211 * 7 (STRING_0): no data
212 * 8 (STRING_1): one char of data
213 * 9 (STRING_N): followed by n as a serialized int64, followed by n characters
214 * All of these represent a string value.
216 * 10 (LIST_MAP): followed by serialized values until STOP is seen.
217 * Represents a map with numeric keys 0, 1, ..., n-1 (but see SKIP below).
219 * 11 (MAP): followed by serialized key/value pairs until STOP
220 * is seen. Represents a map with arbitrary int64 or string keys.
222 * 12 (STOP): no data
223 * Marks the end of a LIST or a MAP.
225 * 13 (SKIP): no data
226 * If seen as an entry in a LIST_MAP, the next index in the sequence will
227 * be skipped. E.g. array(0 => 'a', 1 => 'b', 3 => 'c) will be encoded as
228 * (LIST_MAP, 'a', 'b', SKIP, 'c') instead of
229 * (MAP, 0, 'a', 1, 'b', 3, 'c').
231 * 14 (VECTOR): followed by n serialized values until STOP is seen.
232 * Represents a vector of n values.
234 * In addition, if <c> & 0xf0 != 0xf0, most significant bits of <c> mean:
236 * - 0....... 7-bit unsigned int
237 * (NOTE: not used for the sole int value due to the compatibility
238 * requirement above)
239 * - 10...... + 6 more bytes, 54-bit unsigned int
240 * - 110..... + 1 more byte, 13-bit unsigned int
241 * - 1110.... + 2 more bytes, 20-bit unsigned int
243 * All of these represent an int64 value.
246 enum FbCompactSerializeCode {
247 FB_CS_INT16 = 0,
248 FB_CS_INT32 = 1,
249 FB_CS_INT64 = 2,
250 FB_CS_NULL = 3,
251 FB_CS_TRUE = 4,
252 FB_CS_FALSE = 5,
253 FB_CS_DOUBLE = 6,
254 FB_CS_STRING_0 = 7,
255 FB_CS_STRING_1 = 8,
256 FB_CS_STRING_N = 9,
257 FB_CS_LIST_MAP = 10,
258 FB_CS_MAP = 11,
259 FB_CS_STOP = 12,
260 FB_CS_SKIP = 13,
261 FB_CS_VECTOR = 14,
262 FB_CS_OBJ = 15,
263 FB_CS_MAX_CODE = 16,
266 // 1 byte: 0<7 bits>
267 const uint64_t kInt7Mask = 0x7f;
268 const uint64_t kInt7Prefix = 0x00;
270 // 2 bytes: 110<13 bits>
271 const uint64_t kInt13Mask = (1ULL << 13) - 1;
272 const uint64_t kInt13PrefixMsbMask = 0xe0;
273 const uint64_t kInt13PrefixMsb = 0xc0;
274 const uint64_t kInt13Prefix = kInt13PrefixMsb << (1 * 8);
276 // 3 bytes: 1110<20 bits>
277 const uint64_t kInt20Mask = (1ULL << 20) - 1;
278 const uint64_t kInt20PrefixMsbMask = 0xf0;
279 const uint64_t kInt20PrefixMsb = 0xe0;
280 const uint64_t kInt20Prefix = kInt20PrefixMsb << (2 * 8);
282 // 7 bytes: 10<54 bits>
283 const uint64_t kInt54Mask = (1ULL << 54) - 1;
284 const uint64_t kInt54PrefixMsbMask = 0xc0;
285 const uint64_t kInt54PrefixMsb = 0x80;
286 const uint64_t kInt54Prefix = kInt54PrefixMsb << (6 * 8);
288 // 1 byte: 1111<4 bits>
289 const uint64_t kCodeMask = 0x0f;
290 const uint64_t kCodePrefix = 0xf0;
292 static void fb_compact_serialize_code(StringBuffer& sb,
293 FbCompactSerializeCode code) {
294 assertx(code == (code & kCodeMask));
295 uint8_t v = (kCodePrefix | code);
296 sb.append(reinterpret_cast<char*>(&v), 1);
299 static void fb_compact_serialize_int64(StringBuffer& sb, int64_t val) {
300 if (val >= 0 && (uint64_t)val <= kInt7Mask) {
301 uint8_t nval = val;
302 sb.append(reinterpret_cast<char*>(&nval), 1);
304 } else if (val >= 0 && (uint64_t)val <= kInt13Mask) {
305 uint16_t nval = htons(kInt13Prefix | val);
306 sb.append(reinterpret_cast<char*>(&nval), 2);
308 } else if (val == (int64_t)(int16_t)val) {
309 fb_compact_serialize_code(sb, FB_CS_INT16);
310 uint16_t nval = htons(val);
311 sb.append(reinterpret_cast<char*>(&nval), 2);
313 } else if (val >= 0 && (uint64_t)val <= kInt20Mask) {
314 uint32_t nval = htonl(kInt20Prefix | val);
315 // Skip most significant byte
316 sb.append(reinterpret_cast<char*>(&nval) + 1, 3);
318 } else if (val == (int64_t)(int32_t)val) {
319 fb_compact_serialize_code(sb, FB_CS_INT32);
320 uint32_t nval = htonl(val);
321 sb.append(reinterpret_cast<char*>(&nval), 4);
323 } else if (val >= 0 && (uint64_t)val <= kInt54Mask) {
324 uint64_t nval = htonll(kInt54Prefix | val);
325 // Skip most significant byte
326 sb.append(reinterpret_cast<char*>(&nval) + 1, 7);
328 } else {
329 fb_compact_serialize_code(sb, FB_CS_INT64);
330 uint64_t nval = htonll(val);
331 sb.append(reinterpret_cast<char*>(&nval), 8);
335 static void fb_compact_serialize_string(StringBuffer& sb, const String& str) {
336 int len = str.size();
337 if (len == 0) {
338 fb_compact_serialize_code(sb, FB_CS_STRING_0);
339 } else {
340 if (len == 1) {
341 fb_compact_serialize_code(sb, FB_CS_STRING_1);
342 } else {
343 fb_compact_serialize_code(sb, FB_CS_STRING_N);
344 fb_compact_serialize_int64(sb, len);
346 sb.append(str.data(), len);
350 static bool fb_compact_serialize_is_list(const Array& arr, int64_t& index_limit) {
351 index_limit = arr.size();
352 int64_t max_index = 0;
353 for (ArrayIter it(arr); it; ++it) {
354 Variant key = it.first();
355 if (!key.isNumeric()) {
356 return false;
358 int64_t index = key.toInt64();
359 if (index < max_index) {
360 return false;
362 if (index > max_index) {
363 max_index = index;
367 if (max_index >= arr.size() * 2) {
368 // Might as well store it as a map
369 return false;
372 index_limit = max_index + 1;
373 return true;
376 static int fb_compact_serialize_variant(
377 StringBuffer& sd, const Variant& var, int depth);
379 static void fb_compact_serialize_array_as_list_map(
380 StringBuffer& sb, const Array& arr, int64_t index_limit, int depth) {
381 fb_compact_serialize_code(sb, FB_CS_LIST_MAP);
382 for (int64_t i = 0; i < index_limit; ++i) {
383 if (arr.exists(i)) {
384 fb_compact_serialize_variant(sb, arr[i], depth + 1);
385 } else {
386 fb_compact_serialize_code(sb, FB_CS_SKIP);
389 fb_compact_serialize_code(sb, FB_CS_STOP);
392 static void fb_compact_serialize_vec(
393 StringBuffer& sb, const Array& arr, int depth) {
394 fb_compact_serialize_code(sb, FB_CS_LIST_MAP);
395 PackedArray::IterateV(
396 arr.get(),
397 [&](TypedValue v) {
398 fb_compact_serialize_variant(sb, VarNR(v), depth + 1);
401 fb_compact_serialize_code(sb, FB_CS_STOP);
404 static void fb_compact_serialize_array_as_map(
405 StringBuffer& sb, const Array& arr, int depth) {
406 fb_compact_serialize_code(sb, FB_CS_MAP);
407 IterateKV(
408 arr.get(),
409 [&](Cell k, TypedValue v) {
410 if (isStringType(k.m_type)) {
411 fb_compact_serialize_string(sb, StrNR{k.m_data.pstr});
412 } else {
413 assertx(isIntType(k.m_type));
414 fb_compact_serialize_int64(sb, k.m_data.num);
416 fb_compact_serialize_variant(sb, VarNR(v), depth + 1);
419 fb_compact_serialize_code(sb, FB_CS_STOP);
422 static void fb_compact_serialize_keyset(
423 StringBuffer& sb, const Array& arr) {
424 fb_compact_serialize_code(sb, FB_CS_MAP);
425 SetArray::Iterate(
426 SetArray::asSet(arr.get()),
427 [&](TypedValue v) {
428 if (isStringType(v.m_type)) {
429 fb_compact_serialize_string(sb, StrNR{v.m_data.pstr});
430 fb_compact_serialize_string(sb, StrNR{v.m_data.pstr});
431 } else {
432 assertx(v.m_type == KindOfInt64);
433 fb_compact_serialize_int64(sb, v.m_data.num);
434 fb_compact_serialize_int64(sb, v.m_data.num);
438 fb_compact_serialize_code(sb, FB_CS_STOP);
441 static int fb_compact_serialize_variant(
442 StringBuffer& sb, const Variant& var, int depth) {
443 if (depth > 256) {
444 return 1;
447 switch (var.getType()) {
448 case KindOfUninit:
449 case KindOfNull:
450 fb_compact_serialize_code(sb, FB_CS_NULL);
451 return 0;
453 case KindOfBoolean:
454 if (var.toInt64()) {
455 fb_compact_serialize_code(sb, FB_CS_TRUE);
456 } else {
457 fb_compact_serialize_code(sb, FB_CS_FALSE);
459 return 0;
461 case KindOfInt64:
462 fb_compact_serialize_int64(sb, var.toInt64());
463 return 0;
465 case KindOfDouble: {
466 fb_compact_serialize_code(sb, FB_CS_DOUBLE);
467 double d = var.toDouble();
468 sb.append(reinterpret_cast<char*>(&d), 8);
469 return 0;
472 case KindOfPersistentString:
473 case KindOfString:
474 case KindOfFunc:
475 case KindOfClass:
476 fb_compact_serialize_string(sb, var.toString());
477 return 0;
479 case KindOfPersistentVec:
480 case KindOfVec: {
481 Array arr = var.toArray();
482 assertx(arr->isVecArray());
483 fb_compact_serialize_vec(sb, std::move(arr), depth);
484 return 0;
487 case KindOfPersistentDict:
488 case KindOfDict: {
489 Array arr = var.toArray();
490 assertx(arr->isDict());
491 fb_compact_serialize_array_as_map(sb, std::move(arr), depth);
492 return 0;
495 case KindOfPersistentKeyset:
496 case KindOfKeyset: {
497 Array arr = var.toArray();
498 assertx(arr->isKeyset());
499 fb_compact_serialize_keyset(sb, std::move(arr));
500 return 0;
503 case KindOfPersistentShape:
504 case KindOfShape: { // TODO(T31134050)
505 Array arr = var.toArray();
506 assertx(arr->isDictOrDArray());
507 fb_compact_serialize_array_as_map(sb, std::move(arr), depth);
508 return 0;
511 case KindOfPersistentArray:
512 case KindOfArray: {
513 Array arr = var.toArray();
514 assertx(arr->isPHPArray());
515 int64_t index_limit;
516 if (fb_compact_serialize_is_list(arr, index_limit)) {
517 fb_compact_serialize_array_as_list_map(
518 sb, std::move(arr), index_limit, depth);
519 } else {
520 fb_compact_serialize_array_as_map(sb, std::move(arr), depth);
522 return 0;
525 case KindOfClsMeth: {
526 Array arr = var.toArray();
527 if (RuntimeOption::EvalHackArrDVArrs) {
528 assertx(arr->isVecArray());
529 fb_compact_serialize_vec(sb, std::move(arr), depth);
530 } else {
531 assertx(arr->isPHPArray());
532 int64_t index_limit;
533 fb_compact_serialize_is_list(arr, index_limit);
534 fb_compact_serialize_array_as_list_map(
535 sb, std::move(arr), index_limit, depth);
537 return 0;
540 case KindOfObject:
541 case KindOfResource:
542 case KindOfRef:
543 fb_compact_serialize_code(sb, FB_CS_NULL);
544 raise_warning(
545 "fb_compact_serialize(): unable to serialize "
546 "object/resource/ref/func/class"
548 break;
551 return 1;
554 String fb_compact_serialize(const Variant& thing) {
556 * If thing is a single int value [0, 127] normally we would serialize
557 * it as a single byte (7 bit unsigned int).
559 * However, we want highest bit of the first byte to always be set so
560 * that we can tell if the string is fb_serialize'd or fb_compact_serialize'd.
562 * So we force to serialize it as 13 bit unsigned int instead.
564 if (thing.getType() == KindOfInt64) {
565 int64_t val = thing.toInt64();
566 if (val >= 0 && (uint64_t)val <= kInt7Mask) {
567 String s(2, ReserveString);
568 *(uint16_t*)(s.mutableData()) = (uint16_t)htons(kInt13Prefix | val);
569 s.setSize(2);
570 return s;
574 StringBuffer sb;
575 if (fb_compact_serialize_variant(sb, thing, 0)) {
576 return String();
579 return sb.detach();
582 Variant HHVM_FUNCTION(fb_compact_serialize, const Variant& thing) {
583 return fb_compact_serialize(thing);
586 /* Check if there are enough bytes left in the buffer */
587 #define CHECK_ENOUGH(bytes, pos, num) do { \
588 if ((int)(bytes) > (int)((num) - (pos))) { \
589 return FB_UNSERIALIZE_UNEXPECTED_END; \
591 } while (0)
594 int fb_compact_unserialize_int64_from_buffer(
595 int64_t& out, const char* buf, int n, int& p) {
597 CHECK_ENOUGH(1, p, n);
598 uint64_t first = (unsigned char)buf[p];
599 if ((first & ~kInt7Mask) == kInt7Prefix) {
600 p += 1;
601 out = first & kInt7Mask;
603 } else if ((first & kInt13PrefixMsbMask) == kInt13PrefixMsb) {
604 CHECK_ENOUGH(2, p, n);
605 uint16_t val = (uint16_t)ntohs(*reinterpret_cast<const uint16_t*>(buf + p));
606 p += 2;
607 out = val & kInt13Mask;
609 } else if (first == (kCodePrefix | FB_CS_INT16)) {
610 p += 1;
611 CHECK_ENOUGH(2, p, n);
612 int16_t val = (int16_t)ntohs(*reinterpret_cast<const int16_t*>(buf + p));
613 p += 2;
614 out = val;
616 } else if ((first & kInt20PrefixMsbMask) == kInt20PrefixMsb) {
617 CHECK_ENOUGH(3, p, n);
618 uint32_t b = 0;
619 memcpy(&b, buf + p, 3);
620 uint32_t val = ntohl(b);
621 p += 3;
622 out = (val >> 8) & kInt20Mask;
624 } else if (first == (kCodePrefix | FB_CS_INT32)) {
625 p += 1;
626 CHECK_ENOUGH(4, p, n);
627 int32_t val = (int32_t)ntohl(*reinterpret_cast<const int32_t*>(buf + p));
628 p += 4;
629 out = val;
631 } else if ((first & kInt54PrefixMsbMask) == kInt54PrefixMsb) {
632 CHECK_ENOUGH(7, p, n);
633 uint64_t b = 0;
634 memcpy(&b, buf + p, 7);
635 uint64_t val = ntohll(b);
636 p += 7;
637 out = (val >> 8) & kInt54Mask;
639 } else if (first == (kCodePrefix | FB_CS_INT64)) {
640 p += 1;
641 CHECK_ENOUGH(8, p, n);
642 int64_t val = (int64_t)ntohll(*reinterpret_cast<const int64_t*>(buf + p));
643 p += 8;
644 out = val;
646 } else {
647 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE;
650 return 0;
653 const StaticString s_empty("");
655 int fb_compact_unserialize_from_buffer(
656 Variant& out, const char* buf, int n, int& p) {
658 CHECK_ENOUGH(1, p, n);
659 int code = (unsigned char)buf[p];
660 if ((code & ~kCodeMask) != kCodePrefix ||
661 (code & kCodeMask) == FB_CS_INT16 ||
662 (code & kCodeMask) == FB_CS_INT32 ||
663 (code & kCodeMask) == FB_CS_INT64) {
665 int64_t val;
666 int err = fb_compact_unserialize_int64_from_buffer(val, buf, n, p);
667 if (err) {
668 return err;
670 out = (int64_t)val;
671 return 0;
673 p += 1;
674 code &= kCodeMask;
675 switch (code) {
676 case FB_CS_NULL:
677 out = uninit_null();
678 break;
680 case FB_CS_TRUE:
681 out = true;
682 break;
684 case FB_CS_FALSE:
685 out = false;
686 break;
688 case FB_CS_DOUBLE:
690 CHECK_ENOUGH(8, p, n);
691 double d = *reinterpret_cast<const double*>(buf + p);
692 p += 8;
693 out = d;
694 break;
697 case FB_CS_STRING_0:
699 out = s_empty;
700 break;
703 case FB_CS_STRING_1:
704 case FB_CS_STRING_N:
706 int64_t len = 1;
707 if (code == FB_CS_STRING_N) {
708 int err = fb_compact_unserialize_int64_from_buffer(len, buf, n, p);
709 if (err) {
710 return err;
714 CHECK_ENOUGH(len, p, n);
715 out = Variant::attach(StringData::Make(buf + p, len, CopyString));
716 p += len;
717 break;
720 case FB_CS_VECTOR:
722 Array arr = Array::Create();
723 int64_t i = 0;
724 bool should_log_skip =
725 RuntimeOption::EvalHackArrCompatCompactSerializeNotices;
726 while (p < n && buf[p] != (char)(kCodePrefix | FB_CS_STOP)) {
727 if (buf[p] == (char)(kCodePrefix | FB_CS_SKIP)) {
728 if (UNLIKELY(should_log_skip)) {
729 should_log_skip = false;
730 raise_hackarr_compat_notice(
731 "fb_compact_unserialize(): vector cannot contain skip");
733 ++i;
734 ++p;
735 continue;
737 Variant value;
738 int err = fb_compact_unserialize_from_buffer(value, buf, n, p);
739 if (err) {
740 return err;
742 arr.set(i++, value);
745 // Consume STOP
746 CHECK_ENOUGH(1, p, n);
747 p += 1;
749 out = arr;
750 break;
753 case FB_CS_LIST_MAP:
755 Array arr = Array::CreateDArray();
756 int64_t i = 0;
757 while (p < n && buf[p] != (char)(kCodePrefix | FB_CS_STOP)) {
758 if (buf[p] == (char)(kCodePrefix | FB_CS_SKIP)) {
759 ++i;
760 ++p;
761 } else {
762 Variant value;
763 int err = fb_compact_unserialize_from_buffer(value, buf, n, p);
764 if (err) {
765 return err;
767 arr.set(i++, value);
771 // Consume STOP
772 CHECK_ENOUGH(1, p, n);
773 p += 1;
775 out = arr;
776 break;
779 case FB_CS_MAP:
781 Array arr = Array::CreateDArray();
782 while (p < n && buf[p] != (char)(kCodePrefix | FB_CS_STOP)) {
783 Variant key;
784 int err = fb_compact_unserialize_from_buffer(key, buf, n, p);
785 if (err) {
786 return err;
788 Variant value;
789 err = fb_compact_unserialize_from_buffer(value, buf, n, p);
790 if (err) {
791 return err;
793 if (key.getType() == KindOfInt64) {
794 arr.set(key.toInt64(), value);
795 } else if (key.getType() == KindOfString ||
796 key.getType() == KindOfPersistentString) {
797 const auto arrkey = arr.convertKey<IntishCast::Cast>(key);
798 arr.set(arrkey, *value.asTypedValue());
799 } else {
800 return FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE;
804 // Consume STOP
805 CHECK_ENOUGH(1, p, n);
806 p += 1;
808 out = arr;
809 break;
812 default:
813 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE;
816 return 0;
819 Variant fb_compact_unserialize(const char* str, int len,
820 VRefParam success,
821 VRefParam errcode /* = uninit_variant */) {
823 Variant ret;
824 int p = 0;
825 int err = fb_compact_unserialize_from_buffer(ret, str, len, p);
826 if (err) {
827 success.assignIfRef(false);
828 errcode.assignIfRef(err);
829 return false;
831 success.assignIfRef(true);
832 errcode.assignIfRef(init_null());
833 return ret;
836 Variant HHVM_FUNCTION(fb_compact_unserialize,
837 const Variant& thing, VRefParam success,
838 VRefParam errcode /* = uninit_variant */) {
839 if (!thing.isString()) {
840 success.assignIfRef(false);
841 errcode.assignIfRef(FB_UNSERIALIZE_NONSTRING_VALUE);
842 return false;
845 String s = thing.toString();
846 return fb_compact_unserialize(s.data(), s.size(), ref(success),
847 ref(errcode));
850 ///////////////////////////////////////////////////////////////////////////////
852 bool HHVM_FUNCTION(fb_utf8ize, VRefParam input) {
853 String s = input.toString();
854 const char* const srcBuf = s.data();
855 int32_t srcLenBytes = s.size();
857 if (s.size() < 0 || s.size() > INT_MAX) {
858 return false; // Too long.
861 // Preflight to avoid allocation if the entire input is valid.
862 int32_t srcPosBytes;
863 for (srcPosBytes = 0; srcPosBytes < srcLenBytes; /* U8_NEXT increments */) {
864 // This is lame, but gcc doesn't optimize U8_NEXT very well
865 if (srcBuf[srcPosBytes] != 0 && !(srcBuf[srcPosBytes] & 0x80)) {
866 srcPosBytes++; // U8_NEXT would increment this
867 continue;
869 UChar32 curCodePoint;
870 // U8_NEXT() always advances srcPosBytes; save in case curCodePoint invalid
871 int32_t savedSrcPosBytes = srcPosBytes;
872 U8_NEXT(srcBuf, srcPosBytes, srcLenBytes, curCodePoint);
873 if (curCodePoint <= 0) {
874 // curCodePoint invalid; back up so we'll fix it in the loop below.
875 srcPosBytes = savedSrcPosBytes;
876 break;
880 if (srcPosBytes == srcLenBytes) {
881 // it's all valid
882 return true;
885 // There are invalid bytes. Allocate memory, then copy the input, replacing
886 // invalid sequences with either the substitution character or nothing,
887 // depending on the value of RuntimeOption::Utf8izeReplace.
889 // Worst case, every remaining byte is invalid, taking a 3-byte substitution.
890 int32_t bytesRemaining = srcLenBytes - srcPosBytes;
891 uint64_t dstMaxLenBytes = srcPosBytes + (RuntimeOption::Utf8izeReplace ?
892 bytesRemaining * U8_LENGTH(SUBSTITUTION_CHARACTER) :
893 bytesRemaining);
894 if (dstMaxLenBytes > INT_MAX) {
895 return false; // Too long.
897 String dstStr(dstMaxLenBytes, ReserveString);
898 char *dstBuf = dstStr.mutableData();
900 // Copy valid bytes found so far as one solid block.
901 memcpy(dstBuf, srcBuf, srcPosBytes);
903 // Iterate through the remaining bytes.
904 int32_t dstPosBytes = srcPosBytes; // already copied srcPosBytes
905 for (/* already init'd */; srcPosBytes < srcLenBytes; /* see U8_NEXT */) {
906 UChar32 curCodePoint;
907 // This is lame, but gcc doesn't optimize U8_NEXT very well
908 if (srcBuf[srcPosBytes] != 0 && !(srcBuf[srcPosBytes] & 0x80)) {
909 curCodePoint = srcBuf[srcPosBytes++]; // U8_NEXT would increment
910 } else {
911 U8_NEXT(srcBuf, srcPosBytes, srcLenBytes, curCodePoint);
913 if (curCodePoint <= 0) {
914 // Invalid UTF-8 sequence.
915 // N.B. We consider a null byte an invalid sequence.
916 if (!RuntimeOption::Utf8izeReplace) {
917 continue; // Omit invalid sequence
919 curCodePoint = SUBSTITUTION_CHARACTER; // Replace invalid sequences
921 // We know that resultBuffer > total possible length.
922 U8_APPEND_UNSAFE(dstBuf, dstPosBytes, curCodePoint);
924 assertx(dstPosBytes <= dstMaxLenBytes);
925 input.assignIfRef(dstStr.shrink(dstPosBytes));
926 return true;
930 * Private utf8_strlen implementation.
932 * Returns count of code points in input, substituting 1 code point per invalid
933 * sequence.
935 * deprecated=true: instead return byte count on invalid UTF-8 sequence.
937 static int fb_utf8_strlen_impl(const String& input, bool deprecated) {
938 // Count, don't modify.
939 int32_t sourceLength = input.size();
940 const char* const sourceBuffer = input.data();
941 int64_t num_code_points = 0;
943 for (int32_t sourceOffset = 0; sourceOffset < sourceLength; ) {
944 UChar32 sourceCodePoint;
945 // U8_NEXT() is guaranteed to advance sourceOffset by 1-4 each time it's
946 // invoked.
947 U8_NEXT(sourceBuffer, sourceOffset, sourceLength, sourceCodePoint);
948 if (deprecated && sourceCodePoint < 0) {
949 return sourceLength; // return byte count on invalid sequence
951 num_code_points++;
953 return num_code_points;
956 int64_t HHVM_FUNCTION(fb_utf8_strlen, const String& input) {
957 return fb_utf8_strlen_impl(input, /* deprecated */ false);
960 int64_t HHVM_FUNCTION(fb_utf8_strlen_deprecated, const String& input) {
961 return fb_utf8_strlen_impl(input, /* deprecated */ true);
965 * Private helper; requires non-negative firstCodePoint and desiredCodePoints.
967 static String fb_utf8_substr_simple(const String& str,
968 int32_t firstCodePoint,
969 int32_t numDesiredCodePoints) {
970 const char* const srcBuf = str.data();
971 int32_t srcLenBytes = str.size(); // May truncate; checked before use below.
973 assertx(firstCodePoint >= 0); // Wrapper fixes up negative starting positions.
974 assertx(numDesiredCodePoints > 0); // Wrapper fixes up negative/zero length.
975 if (str.size() <= 0 ||
976 str.size() > INT_MAX ||
977 firstCodePoint >= srcLenBytes) {
978 return empty_string();
981 // Cannot be more code points than bytes in input. This typically reduces
982 // the INT_MAX default value to something more reasonable.
983 numDesiredCodePoints = std::min(numDesiredCodePoints,
984 srcLenBytes - firstCodePoint);
986 // Pre-allocate the result.
987 // the worst case can come from one of two sources:
988 // - every code point could be the substitution char (3 bytes)
989 // giving us numDesiredCodePoints * 3
990 // - every code point could be 4 bytes long, giving us
991 // numDesiredCodePoints * 4 - but capped by the length of the input
992 uint64_t dstMaxLenBytes =
993 std::min((uint64_t)numDesiredCodePoints * 4,
994 (uint64_t)srcLenBytes - firstCodePoint);
995 dstMaxLenBytes = std::max(dstMaxLenBytes,
996 (uint64_t)numDesiredCodePoints *
997 U8_LENGTH(SUBSTITUTION_CHARACTER));
998 if (dstMaxLenBytes > INT_MAX) {
999 return empty_string(); // Too long.
1001 String dstStr(dstMaxLenBytes, ReserveString);
1002 char* dstBuf = dstStr.mutableData();
1003 int32_t dstPosBytes = 0;
1005 // Iterate through src's codepoints; srcPosBytes is incremented by U8_NEXT.
1006 for (int32_t srcPosBytes = 0, srcPosCodePoints = 0;
1007 srcPosBytes < srcLenBytes && // more available
1008 srcPosCodePoints < firstCodePoint + numDesiredCodePoints; // want more
1009 srcPosCodePoints++) {
1011 // U8_NEXT() advances sourceBytePos by 1-4 each time it's invoked.
1012 UChar32 curCodePoint;
1013 U8_NEXT(srcBuf, srcPosBytes, srcLenBytes, curCodePoint);
1015 if (srcPosCodePoints >= firstCodePoint) {
1016 // Copy this code point into the result.
1017 if (curCodePoint < 0) {
1018 curCodePoint = SUBSTITUTION_CHARACTER; // replace invalid sequences
1020 // We know that resultBuffer > total possible length.
1021 // U8_APPEND_UNSAFE updates dstPosBytes.
1022 U8_APPEND_UNSAFE(dstBuf, dstPosBytes, curCodePoint);
1026 assertx(dstPosBytes <= dstMaxLenBytes);
1027 if (dstPosBytes > 0) {
1028 dstStr.shrink(dstPosBytes);
1029 return dstStr;
1031 return empty_string();
1034 String HHVM_FUNCTION(fb_utf8_substr, const String& str, int64_t start,
1035 int64_t length /* = INT_MAX */) {
1036 if (length > INT_MAX) {
1037 length = INT_MAX;
1039 // For negative start or length, calculate start and length values
1040 // based on total code points.
1041 if (start < 0 || length < 0) {
1042 // Get number of code points assuming we substitute invalid sequences.
1043 Variant utf8StrlenResult = HHVM_FN(fb_utf8_strlen)(str);
1044 int32_t sourceNumCodePoints = utf8StrlenResult.toInt32();
1046 if (start < 0) {
1047 // Negative means first character is start'th code point from end.
1048 // e.g., -1 means start with the last code point.
1049 start = sourceNumCodePoints + start; // adding negative start
1051 if (length < 0) {
1052 // Negative means omit last abs(length) code points.
1053 length = sourceNumCodePoints - start + length; // adding negative length
1056 if (start < 0 || length <= 0) {
1057 return empty_string(); // Empty result
1060 return fb_utf8_substr_simple(str, start, length);
1063 ///////////////////////////////////////////////////////////////////////////////
1065 bool HHVM_FUNCTION(fb_intercept, const String& name, const Variant& handler,
1066 const Variant& data /* = uninit_variant */) {
1067 return register_intercept(name, handler, data, true);
1070 bool HHVM_FUNCTION(fb_rename_function, const String& orig_func_name,
1071 const String& new_func_name) {
1072 if (orig_func_name.empty() || new_func_name.empty() ||
1073 orig_func_name.get()->isame(new_func_name.get())) {
1074 throw_invalid_argument("unable to rename %s", orig_func_name.data());
1075 return false;
1078 if (!function_exists(orig_func_name)) {
1079 raise_warning("fb_rename_function(%s, %s) failed: %s does not exist!",
1080 orig_func_name.data(), new_func_name.data(),
1081 orig_func_name.data());
1082 return false;
1085 if (function_exists(new_func_name)) {
1086 if (new_func_name.data()[0] != '1') {
1087 raise_warning("fb_rename_function(%s, %s) failed: %s already exists!",
1088 orig_func_name.data(), new_func_name.data(),
1089 new_func_name.data());
1090 return false;
1094 rename_function(orig_func_name, new_func_name);
1095 return true;
1098 ///////////////////////////////////////////////////////////////////////////////
1100 Variant HHVM_FUNCTION(fb_get_code_coverage, bool flush) {
1101 RequestInfo *ti = RequestInfo::s_requestInfo.getNoCheck();
1102 if (ti->m_reqInjectionData.getCoverage()) {
1103 Array ret = ti->m_coverage->Report();
1104 if (flush) {
1105 ti->m_coverage->Reset();
1107 return ret;
1109 return false;
1112 void HHVM_FUNCTION(fb_enable_code_coverage) {
1113 RequestInfo *ti = RequestInfo::s_requestInfo.getNoCheck();
1114 ti->m_coverage->Reset();
1115 ti->m_reqInjectionData.setCoverage(true);
1116 if (g_context->isNested()) {
1117 raise_notice("Calling fb_enable_code_coverage from a nested "
1118 "VM instance may cause unpredicable results");
1120 throw VMSwitchModeBuiltin();
1123 Array disable_code_coverage_helper(bool report_frequency) {
1124 RequestInfo *ti = RequestInfo::s_requestInfo.getNoCheck();
1125 ti->m_reqInjectionData.setCoverage(false);
1126 auto ret = ti->m_coverage->Report(report_frequency);
1127 ti->m_coverage->Reset();
1128 return ret;
1131 Array HHVM_FUNCTION(fb_disable_code_coverage) {
1132 return disable_code_coverage_helper(/* report frequency */ false);
1135 Array HHVM_FUNCTION(HH_disable_code_coverage_with_frequency) {
1136 return disable_code_coverage_helper(/* report frequency */ true);
1139 ///////////////////////////////////////////////////////////////////////////////
1141 bool HHVM_FUNCTION(fb_output_compression, bool new_value) {
1142 Transport *transport = g_context->getTransport();
1143 if (transport) {
1144 bool rv = transport->isCompressionEnabled();
1145 if (new_value) {
1146 transport->enableCompression();
1147 } else {
1148 transport->disableCompression();
1150 return rv;
1152 return false;
1155 void HHVM_FUNCTION(fb_set_exit_callback, const Variant& function) {
1156 g_context->setExitCallback(function);
1159 const StaticString
1160 s_flush_stats("flush_stats"),
1161 s_chunk_stats("chunk_stats"),
1162 s_total("total"),
1163 s_sent("sent"),
1164 s_time("time");
1166 int64_t HHVM_FUNCTION(fb_get_last_flush_size) {
1167 Transport *transport = g_context->getTransport();
1168 return transport ? transport->getLastChunkSentSize() : 0;
1171 extern Array stat_impl(struct stat*); // ext_file.cpp
1173 template<class Function>
1174 static Variant do_lazy_stat(Function dostat, const String& filename) {
1175 struct stat sb;
1176 if (dostat(File::TranslatePathWithFileCache(filename).c_str(), &sb)) {
1177 Logger::Verbose("%s/%d: %s", __FUNCTION__, __LINE__,
1178 folly::errnoStr(errno).c_str());
1179 return false;
1181 return stat_impl(&sb);
1184 Variant HHVM_FUNCTION(fb_lazy_lstat, const String& filename) {
1185 if (!FileUtil::checkPathAndWarn(filename, __FUNCTION__ + 2, 1)) {
1186 return false;
1188 return do_lazy_stat(StatCache::lstat, filename);
1191 Variant HHVM_FUNCTION(fb_lazy_realpath, const String& filename) {
1192 if (!FileUtil::checkPathAndWarn(filename, __FUNCTION__ + 2, 1)) {
1193 return false;
1196 return StatCache::realpath(filename.c_str());
1199 int64_t HHVM_FUNCTION(HH_non_crypto_md5_upper, StringArg str) {
1200 Md5Digest md5(str.get()->data(), str.get()->size());
1201 int64_t pre_decode;
1202 // Work around "strict aliasing" with memcpy
1203 memcpy(&pre_decode, md5.digest, sizeof(pre_decode));
1204 // When PHP/Hack users decode MD5 hex, they treat it as big endian.
1205 // Replicate that here.
1206 return folly::Endian::big(pre_decode);
1209 int64_t HHVM_FUNCTION(HH_non_crypto_md5_lower, StringArg str) {
1210 Md5Digest md5(str.get()->data(), str.get()->size());
1211 int64_t pre_decode;
1212 // Work around "strict aliasing" with memcpy
1213 memcpy(&pre_decode, md5.digest + 8, sizeof(pre_decode));
1214 // When PHP/Hack users decode MD5 hex, they treat it as big endian.
1215 // Replicate that here.
1216 return folly::Endian::big(pre_decode);
1219 ///////////////////////////////////////////////////////////////////////////////
1221 EXTERNALLY_VISIBLE
1222 void const_load() {
1223 // TODO(8117903): Unused; remove after updating www side.
1226 ///////////////////////////////////////////////////////////////////////////////
1228 struct FBExtension : Extension {
1229 FBExtension(): Extension("fb", "1.0.0") {}
1231 void moduleInit() override {
1232 HHVM_RC_BOOL_SAME(HHVM_FACEBOOK);
1233 HHVM_RC_BOOL(HHVM_NO_DESTRUCTORS, one_bit_refcount);
1234 HHVM_RC_INT_SAME(FB_UNSERIALIZE_NONSTRING_VALUE);
1235 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_END);
1236 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE);
1237 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE);
1239 HHVM_RC_INT(FB_SERIALIZE_HACK_ARRAYS, k_FB_SERIALIZE_HACK_ARRAYS);
1241 HHVM_FE(fb_serialize);
1242 HHVM_FE(fb_unserialize);
1243 HHVM_FE(fb_compact_serialize);
1244 HHVM_FE(fb_compact_unserialize);
1245 HHVM_FE(fb_utf8ize);
1246 HHVM_FE(fb_utf8_strlen);
1247 HHVM_FE(fb_utf8_strlen_deprecated);
1248 HHVM_FE(fb_utf8_substr);
1249 HHVM_FE(fb_intercept);
1250 HHVM_FE(fb_rename_function);
1251 HHVM_FE(fb_get_code_coverage);
1252 HHVM_FE(fb_enable_code_coverage);
1253 HHVM_FE(fb_disable_code_coverage);
1254 HHVM_FE(fb_output_compression);
1255 HHVM_FE(fb_set_exit_callback);
1256 HHVM_FE(fb_get_last_flush_size);
1257 HHVM_FE(fb_lazy_lstat);
1258 HHVM_FE(fb_lazy_realpath);
1260 HHVM_FALIAS(HH\\disable_code_coverage_with_frequency,
1261 HH_disable_code_coverage_with_frequency);
1262 HHVM_FALIAS(HH\\non_crypto_md5_upper, HH_non_crypto_md5_upper);
1263 HHVM_FALIAS(HH\\non_crypto_md5_lower, HH_non_crypto_md5_lower);
1265 loadSystemlib();
1267 } s_fb_extension;
1269 ///////////////////////////////////////////////////////////////////////////////