Fix memoization of functions called with keyset arguments
[hiphop-php.git] / hphp / runtime / ext / fb / ext_fb.cpp
blob3310f603f3aa0fa07f4c4842fab91dab518fdf99
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2016 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/fb/ext_fb.h"
19 #include <fstream>
21 #include <unicode/uchar.h>
22 #include <unicode/utf8.h>
23 #include <algorithm>
24 #include <memory>
25 #include <utility>
26 #include <vector>
28 #include <folly/String.h>
29 #include <folly/portability/Sockets.h>
31 #include "hphp/util/htonll.h"
32 #include "hphp/util/logger.h"
33 #include "hphp/runtime/base/array-init.h"
34 #include "hphp/runtime/base/builtin-functions.h"
35 #include "hphp/runtime/base/code-coverage.h"
36 #include "hphp/runtime/base/externals.h"
37 #include "hphp/runtime/base/file.h"
38 #include "hphp/runtime/base/file-util.h"
39 #include "hphp/runtime/base/plain-file.h"
40 #include "hphp/runtime/base/unit-cache.h"
41 #include "hphp/runtime/base/intercept.h"
42 #include "hphp/runtime/base/runtime-option.h"
43 #include "hphp/runtime/base/stat-cache.h"
44 #include "hphp/runtime/base/string-buffer.h"
45 #include "hphp/runtime/base/string-util.h"
46 #include "hphp/runtime/base/thread-info.h"
47 #include "hphp/runtime/ext/std/ext_std_function.h"
48 #include "hphp/runtime/ext/fb/FBSerialize/FBSerialize.h"
49 #include "hphp/runtime/ext/fb/VariantController.h"
50 #include "hphp/runtime/vm/unwind.h"
52 #include "hphp/parser/parser.h"
54 namespace HPHP {
56 // fb_serialize options
57 const int64_t k_FB_SERIALIZE_HACK_ARRAYS = 1<<1;
59 ///////////////////////////////////////////////////////////////////////////////
61 static const UChar32 SUBSTITUTION_CHARACTER = 0xFFFD;
63 #define FB_UNSERIALIZE_NONSTRING_VALUE 0x0001
64 #define FB_UNSERIALIZE_UNEXPECTED_END 0x0002
65 #define FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE 0x0003
66 #define FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE 0x0004
68 #ifdef FACEBOOK
69 # define HHVM_FACEBOOK true
70 #else
71 # define HHVM_FACEBOOK false
72 #endif
74 ///////////////////////////////////////////////////////////////////////////////
75 // static strings
77 const StaticString
78 s_IMemoizeParam("HH\\IMemoizeParam"),
79 s_getInstanceKey("getInstanceKey");
81 ///////////////////////////////////////////////////////////////////////////////
83 /* enum of thrift types */
84 enum TType {
85 T_STOP = 1,
86 T_BYTE = 2,
87 T_U16 = 3,
88 T_I16 = 4,
89 T_U32 = 5,
90 T_I32 = 6,
91 T_U64 = 7,
92 T_I64 = 8,
93 T_STRING = 9,
94 T_STRUCT = 10,
95 T_MAP = 11,
96 T_SET = 12,
97 T_LIST = 13,
98 T_NULL = 14,
99 T_VARCHAR = 15,
100 T_DOUBLE = 16,
101 T_BOOLEAN = 17,
104 /* Return the smallest size int that can store the value */
105 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
106 ((x) == ((int16_t)x)) ? 2 : \
107 ((x) == ((int32_t)x)) ? 4 : 8)
109 /* Return the smallest (supported) unsigned length that can store the value */
110 #define LEN_SIZE(x) ((((unsigned)x) == ((uint8_t)x)) ? 1 : 4)
112 Variant HHVM_FUNCTION(fb_serialize, const Variant& thing, int64_t options) {
113 try {
114 if (options & k_FB_SERIALIZE_HACK_ARRAYS) {
115 size_t len = HPHP::serialize
116 ::FBSerializer<VariantControllerUsingHackArrays>
117 ::serializedSize(thing);
118 String s(len, ReserveString);
119 HPHP::serialize
120 ::FBSerializer<VariantControllerUsingHackArrays>
121 ::serialize(thing, s.mutableData());
122 s.setSize(len);
123 return s;
124 } else {
125 size_t len =
126 HPHP::serialize::FBSerializer<VariantController>::serializedSize(thing);
127 String s(len, ReserveString);
128 HPHP::serialize::FBSerializer<VariantController>::serialize(
129 thing, s.mutableData());
130 s.setSize(len);
131 return s;
133 } catch (const HPHP::serialize::KeysetSerializeError&) {
134 SystemLib::throwInvalidArgumentExceptionObject(
135 "Keysets cannot be serialized with fb_serialize"
137 } catch (const HPHP::serialize::HackArraySerializeError&) {
138 SystemLib::throwInvalidArgumentExceptionObject(
139 "Serializing Hack arrays requires the FB_SERIALIZE_HACK_ARRAYS "
140 "option to be provided"
142 } catch (const HPHP::serialize::SerializeError&) {
143 return init_null();
147 Variant HHVM_FUNCTION(fb_unserialize,
148 const Variant& thing,
149 VRefParam success,
150 int64_t options) {
151 if (thing.isString()) {
152 String sthing = thing.toString();
154 if (sthing.size() && (sthing.data()[0] & 0x80)) {
155 return fb_compact_unserialize(sthing.data(), sthing.size(),
156 success);
157 } else {
158 return fb_unserialize(sthing.data(), sthing.size(), success, options);
162 success.assignIfRef(false);
163 return false;
166 Variant fb_unserialize(const char* str,
167 int len,
168 VRefParam success,
169 int64_t options) {
170 try {
171 if (options & k_FB_SERIALIZE_HACK_ARRAYS) {
172 auto res = HPHP::serialize
173 ::FBUnserializer<VariantControllerUsingHackArrays>
174 ::unserialize(folly::StringPiece(str, len));
175 success.assignIfRef(true);
176 return res;
177 } else {
178 auto res = HPHP::serialize::FBUnserializer<VariantController>
179 ::unserialize(folly::StringPiece(str, len));
180 success.assignIfRef(true);
181 return res;
183 } catch (const HPHP::serialize::UnserializeError&) {
184 success.assignIfRef(false);
185 return false;
189 ///////////////////////////////////////////////////////////////////////////////
192 * FB Compact Serialize
193 * ====================
195 * === Compatibility with fb_unserialize ===
197 * Check the high bit in the first byte of the serialized string.
198 * If it's set, the string is fb_compact_serialize'd, otherwise it's
199 * fb_serialize'd.
201 * === Format ===
203 * A value is serialized as a string <c> <data> where c is a byte (0xf0 | code),
204 * code being one of:
206 * 0 (INT16): data is 2 bytes, network order signed int16
207 * 1 (INT32): data is 4 bytes, network order signed int32
208 * 2 (INT64): data is 8 bytes, network order signed int64
209 * All of these represent an int64 value.
211 * 3 (NULL): no data, null value
213 * 4 (TRUE),
214 * 5 (FALSE): no data, boolean value
216 * 6 (DOUBLE): data is 8 bytes, double value
218 * 7 (STRING_0): no data
219 * 8 (STRING_1): one char of data
220 * 9 (STRING_N): followed by n as a serialized int64, followed by n characters
221 * All of these represent a string value.
223 * 10 (LIST_MAP): followed by serialized values until STOP is seen.
224 * Represents a map with numeric keys 0, 1, ..., n-1 (but see SKIP below).
226 * 11 (MAP): followed by serialized key/value pairs until STOP
227 * is seen. Represents a map with arbitrary int64 or string keys.
229 * 12 (STOP): no data
230 * Marks the end of a LIST or a MAP.
232 * 13 (SKIP): no data
233 * If seen as an entry in a LIST_MAP, the next index in the sequence will
234 * be skipped. E.g. array(0 => 'a', 1 => 'b', 3 => 'c) will be encoded as
235 * (LIST_MAP, 'a', 'b', SKIP, 'c') instead of
236 * (MAP, 0, 'a', 1, 'b', 3, 'c').
238 * 14 (VECTOR): followed by n serialized values until STOP is seen.
239 * Represents a vector of n values.
241 * In addition, if <c> & 0xf0 != 0xf0, most significant bits of <c> mean:
243 * - 0....... 7-bit unsigned int
244 * (NOTE: not used for the sole int value due to the compatibility
245 * requirement above)
246 * - 10...... + 6 more bytes, 54-bit unsigned int
247 * - 110..... + 1 more byte, 13-bit unsigned int
248 * - 1110.... + 2 more bytes, 20-bit unsigned int
250 * All of these represent an int64 value.
253 enum FbCompactSerializeCode {
254 FB_CS_INT16 = 0,
255 FB_CS_INT32 = 1,
256 FB_CS_INT64 = 2,
257 FB_CS_NULL = 3,
258 FB_CS_TRUE = 4,
259 FB_CS_FALSE = 5,
260 FB_CS_DOUBLE = 6,
261 FB_CS_STRING_0 = 7,
262 FB_CS_STRING_1 = 8,
263 FB_CS_STRING_N = 9,
264 FB_CS_LIST_MAP = 10,
265 FB_CS_MAP = 11,
266 FB_CS_STOP = 12,
267 FB_CS_SKIP = 13,
268 FB_CS_VECTOR = 14,
269 FB_CS_OBJ = 15,
270 FB_CS_MAX_CODE = 16,
273 static_assert(FB_CS_MAX_CODE <= '$',
274 "FB_CS_MAX_CODE must be less than ASCII '$' or serialize_memoize_param() "
275 "could produce strings that when used as array keys could collide with "
276 "keys it produces.");
278 // 1 byte: 0<7 bits>
279 const uint64_t kInt7Mask = 0x7f;
280 const uint64_t kInt7Prefix = 0x00;
282 // 2 bytes: 110<13 bits>
283 const uint64_t kInt13Mask = (1ULL << 13) - 1;
284 const uint64_t kInt13PrefixMsbMask = 0xe0;
285 const uint64_t kInt13PrefixMsb = 0xc0;
286 const uint64_t kInt13Prefix = kInt13PrefixMsb << (1 * 8);
288 // 3 bytes: 1110<20 bits>
289 const uint64_t kInt20Mask = (1ULL << 20) - 1;
290 const uint64_t kInt20PrefixMsbMask = 0xf0;
291 const uint64_t kInt20PrefixMsb = 0xe0;
292 const uint64_t kInt20Prefix = kInt20PrefixMsb << (2 * 8);
294 // 7 bytes: 10<54 bits>
295 const uint64_t kInt54Mask = (1ULL << 54) - 1;
296 const uint64_t kInt54PrefixMsbMask = 0xc0;
297 const uint64_t kInt54PrefixMsb = 0x80;
298 const uint64_t kInt54Prefix = kInt54PrefixMsb << (6 * 8);
300 // 1 byte: 1111<4 bits>
301 const uint64_t kCodeMask = 0x0f;
302 const uint64_t kCodePrefix = 0xf0;
305 static void fb_compact_serialize_code(StringBuffer& sb,
306 FbCompactSerializeCode code) {
307 assert(code == (code & kCodeMask));
308 uint8_t v = (kCodePrefix | code);
309 sb.append(reinterpret_cast<char*>(&v), 1);
312 static void fb_compact_serialize_int64(StringBuffer& sb, int64_t val) {
313 if (val >= 0 && (uint64_t)val <= kInt7Mask) {
314 uint8_t nval = val;
315 sb.append(reinterpret_cast<char*>(&nval), 1);
317 } else if (val >= 0 && (uint64_t)val <= kInt13Mask) {
318 uint16_t nval = htons(kInt13Prefix | val);
319 sb.append(reinterpret_cast<char*>(&nval), 2);
321 } else if (val == (int64_t)(int16_t)val) {
322 fb_compact_serialize_code(sb, FB_CS_INT16);
323 uint16_t nval = htons(val);
324 sb.append(reinterpret_cast<char*>(&nval), 2);
326 } else if (val >= 0 && (uint64_t)val <= kInt20Mask) {
327 uint32_t nval = htonl(kInt20Prefix | val);
328 // Skip most significant byte
329 sb.append(reinterpret_cast<char*>(&nval) + 1, 3);
331 } else if (val == (int64_t)(int32_t)val) {
332 fb_compact_serialize_code(sb, FB_CS_INT32);
333 uint32_t nval = htonl(val);
334 sb.append(reinterpret_cast<char*>(&nval), 4);
336 } else if (val >= 0 && (uint64_t)val <= kInt54Mask) {
337 uint64_t nval = htonll(kInt54Prefix | val);
338 // Skip most significant byte
339 sb.append(reinterpret_cast<char*>(&nval) + 1, 7);
341 } else {
342 fb_compact_serialize_code(sb, FB_CS_INT64);
343 uint64_t nval = htonll(val);
344 sb.append(reinterpret_cast<char*>(&nval), 8);
348 static void fb_compact_serialize_string(StringBuffer& sb, const String& str) {
349 int len = str.size();
350 if (len == 0) {
351 fb_compact_serialize_code(sb, FB_CS_STRING_0);
352 } else {
353 if (len == 1) {
354 fb_compact_serialize_code(sb, FB_CS_STRING_1);
355 } else {
356 fb_compact_serialize_code(sb, FB_CS_STRING_N);
357 fb_compact_serialize_int64(sb, len);
359 sb.append(str.data(), len);
363 static bool fb_compact_serialize_is_list(const Array& arr, int64_t& index_limit) {
364 index_limit = arr.size();
365 int64_t max_index = 0;
366 for (ArrayIter it(arr); it; ++it) {
367 Variant key = it.first();
368 if (!key.isNumeric()) {
369 return false;
371 int64_t index = key.toInt64();
372 if (index < 0) {
373 return false;
375 if (index > max_index) {
376 max_index = index;
380 if (max_index >= arr.size() * 2) {
381 // Might as well store it as a map
382 return false;
385 index_limit = max_index + 1;
386 return true;
389 static int fb_compact_serialize_variant(StringBuffer& sd,
390 const Variant& var, int depth, FBCompactSerializeBehavior behavior);
392 static void fb_compact_serialize_array_as_list_map(
393 StringBuffer& sb, const Array& arr, int64_t index_limit, int depth,
394 FBCompactSerializeBehavior behavior) {
395 fb_compact_serialize_code(sb, FB_CS_LIST_MAP);
396 for (int64_t i = 0; i < index_limit; ++i) {
397 if (arr.exists(i)) {
398 fb_compact_serialize_variant(sb, arr[i], depth + 1, behavior);
399 } else {
400 fb_compact_serialize_code(sb, FB_CS_SKIP);
403 fb_compact_serialize_code(sb, FB_CS_STOP);
406 static void fb_compact_serialize_vec(
407 StringBuffer& sb, const Array& arr, int depth,
408 FBCompactSerializeBehavior behavior) {
409 fb_compact_serialize_code(sb, FB_CS_LIST_MAP);
410 PackedArray::IterateV(
411 arr.get(),
412 [&](const TypedValue* v) {
413 fb_compact_serialize_variant(sb, tvAsCVarRef(v), depth + 1, behavior);
416 fb_compact_serialize_code(sb, FB_CS_STOP);
419 static void fb_compact_serialize_array_as_map(
420 StringBuffer& sb, const Array& arr, int depth,
421 FBCompactSerializeBehavior behavior) {
422 fb_compact_serialize_code(sb, FB_CS_MAP);
423 IterateKV(
424 arr.get(),
425 [&](const TypedValue* k, const TypedValue* v) {
426 if (tvIsString(k)) {
427 fb_compact_serialize_string(sb, StrNR{k->m_data.pstr});
428 } else {
429 assertx(k->m_type == KindOfInt64);
430 fb_compact_serialize_int64(sb, k->m_data.num);
432 fb_compact_serialize_variant(sb, tvAsCVarRef(v), depth + 1, behavior);
435 fb_compact_serialize_code(sb, FB_CS_STOP);
438 static void fb_compact_serialize_keyset(
439 StringBuffer& sb, const Array& arr, FBCompactSerializeBehavior behavior) {
440 fb_compact_serialize_code(sb, FB_CS_MAP);
441 SetArray::Iterate(
442 SetArray::asSet(arr.get()),
443 [&](const TypedValue* v) {
444 if (tvIsString(v)) {
445 fb_compact_serialize_string(sb, StrNR{v->m_data.pstr});
446 fb_compact_serialize_string(sb, StrNR{v->m_data.pstr});
447 } else {
448 assertx(v->m_type == KindOfInt64);
449 fb_compact_serialize_int64(sb, v->m_data.num);
450 fb_compact_serialize_int64(sb, v->m_data.num);
454 fb_compact_serialize_code(sb, FB_CS_STOP);
457 static int fb_compact_serialize_variant(StringBuffer& sb,
458 const Variant& var,
459 int depth,
460 FBCompactSerializeBehavior behavior) {
461 if (depth > 256) {
462 if (behavior == FBCompactSerializeBehavior::MemoizeParam) {
463 SystemLib::throwInvalidArgumentExceptionObject(
464 "Array depth exceeded");
467 return 1;
470 switch (var.getType()) {
471 case KindOfUninit:
472 case KindOfNull:
473 fb_compact_serialize_code(sb, FB_CS_NULL);
474 return 0;
476 case KindOfBoolean:
477 if (var.toInt64()) {
478 fb_compact_serialize_code(sb, FB_CS_TRUE);
479 } else {
480 fb_compact_serialize_code(sb, FB_CS_FALSE);
482 return 0;
484 case KindOfInt64:
485 fb_compact_serialize_int64(sb, var.toInt64());
486 return 0;
488 case KindOfDouble: {
489 fb_compact_serialize_code(sb, FB_CS_DOUBLE);
490 double d = var.toDouble();
491 sb.append(reinterpret_cast<char*>(&d), 8);
492 return 0;
495 case KindOfPersistentString:
496 case KindOfString:
497 fb_compact_serialize_string(sb, var.toString());
498 return 0;
500 case KindOfPersistentVec:
501 case KindOfVec: {
502 Array arr = var.toArray();
503 assert(arr->isVecArray());
504 fb_compact_serialize_vec(sb, std::move(arr), depth, behavior);
505 return 0;
508 case KindOfPersistentDict:
509 case KindOfDict: {
510 Array arr = var.toArray();
511 assert(arr->isDict());
512 fb_compact_serialize_array_as_map(sb, std::move(arr), depth, behavior);
513 return 0;
516 case KindOfPersistentKeyset:
517 case KindOfKeyset: {
518 Array arr = var.toArray();
519 assert(arr->isKeyset());
520 fb_compact_serialize_keyset(sb, std::move(arr), behavior);
521 return 0;
524 case KindOfPersistentArray:
525 case KindOfArray: {
526 Array arr = var.toArray();
527 assert(arr->isPHPArray());
528 int64_t index_limit;
529 if (fb_compact_serialize_is_list(arr, index_limit)) {
530 fb_compact_serialize_array_as_list_map(sb, std::move(arr), index_limit,
531 depth, behavior);
532 } else {
533 fb_compact_serialize_array_as_map(sb, std::move(arr), depth, behavior);
535 return 0;
538 case KindOfObject: {
539 if (behavior == FBCompactSerializeBehavior::MemoizeParam) {
540 Object obj = var.toObject();
542 if (obj->isCollection()) {
543 fb_compact_serialize_variant(sb, obj->toArray(), depth, behavior);
544 return 0;
547 if (!obj.instanceof(s_IMemoizeParam)) {
548 auto msg = folly::format(
549 "Cannot serialize object of type {} because it does not implement "
550 "HH\\IMemoizeParam",
551 obj->getClassName().asString()).str();
553 SystemLib::throwInvalidArgumentExceptionObject(msg);
556 // Marker that shows that this was an obj so it doesn't collide with
557 // strings
558 fb_compact_serialize_code(sb, FB_CS_OBJ);
560 Variant ser = obj->o_invoke_few_args(s_getInstanceKey, 0);
561 fb_compact_serialize_string(sb, ser.toString());
562 return 0;
565 // If not FBCompactSerializeBehavior::MemoizeParam fall-through to default
567 case KindOfResource:
568 case KindOfRef:
569 case KindOfClass:
570 fb_compact_serialize_code(sb, FB_CS_NULL);
571 raise_warning(
572 "fb_compact_serialize(): unable to serialize object/resource/ref/class"
574 break;
577 if (behavior == FBCompactSerializeBehavior::MemoizeParam) {
578 SystemLib::throwInvalidArgumentExceptionObject(
579 folly::format("Cannot Serialize unexpected type {}",
580 tname(var.getType())).str()
583 return 1;
586 String fb_compact_serialize(const Variant& thing,
587 FBCompactSerializeBehavior behavior) {
589 * If thing is a single int value [0, 127] normally we would serialize
590 * it as a single byte (7 bit unsigned int).
592 * However, we want highest bit of the first byte to always be set so
593 * that we can tell if the string is fb_serialize'd or fb_compact_serialize'd.
595 * So we force to serialize it as 13 bit unsigned int instead.
597 if (thing.getType() == KindOfInt64) {
598 int64_t val = thing.toInt64();
599 if (val >= 0 && (uint64_t)val <= kInt7Mask) {
600 String s(2, ReserveString);
601 *(uint16_t*)(s.mutableData()) = (uint16_t)htons(kInt13Prefix | val);
602 s.setSize(2);
603 return s;
607 StringBuffer sb;
608 if (fb_compact_serialize_variant(sb, thing, 0, behavior)) {
609 return String();
612 return sb.detach();
615 Variant HHVM_FUNCTION(fb_compact_serialize, const Variant& thing) {
616 return fb_compact_serialize(thing, FBCompactSerializeBehavior::Base);
619 /* Check if there are enough bytes left in the buffer */
620 #define CHECK_ENOUGH(bytes, pos, num) do { \
621 if ((int)(bytes) > (int)((num) - (pos))) { \
622 return FB_UNSERIALIZE_UNEXPECTED_END; \
624 } while (0)
627 int fb_compact_unserialize_int64_from_buffer(
628 int64_t& out, const char* buf, int n, int& p) {
630 CHECK_ENOUGH(1, p, n);
631 uint64_t first = (unsigned char)buf[p];
632 if ((first & ~kInt7Mask) == kInt7Prefix) {
633 p += 1;
634 out = first & kInt7Mask;
636 } else if ((first & kInt13PrefixMsbMask) == kInt13PrefixMsb) {
637 CHECK_ENOUGH(2, p, n);
638 uint16_t val = (uint16_t)ntohs(*reinterpret_cast<const uint16_t*>(buf + p));
639 p += 2;
640 out = val & kInt13Mask;
642 } else if (first == (kCodePrefix | FB_CS_INT16)) {
643 p += 1;
644 CHECK_ENOUGH(2, p, n);
645 int16_t val = (int16_t)ntohs(*reinterpret_cast<const int16_t*>(buf + p));
646 p += 2;
647 out = val;
649 } else if ((first & kInt20PrefixMsbMask) == kInt20PrefixMsb) {
650 CHECK_ENOUGH(3, p, n);
651 uint32_t b = 0;
652 memcpy(&b, buf + p, 3);
653 uint32_t val = ntohl(b);
654 p += 3;
655 out = (val >> 8) & kInt20Mask;
657 } else if (first == (kCodePrefix | FB_CS_INT32)) {
658 p += 1;
659 CHECK_ENOUGH(4, p, n);
660 int32_t val = (int32_t)ntohl(*reinterpret_cast<const int32_t*>(buf + p));
661 p += 4;
662 out = val;
664 } else if ((first & kInt54PrefixMsbMask) == kInt54PrefixMsb) {
665 CHECK_ENOUGH(7, p, n);
666 uint64_t b = 0;
667 memcpy(&b, buf + p, 7);
668 uint64_t val = ntohll(b);
669 p += 7;
670 out = (val >> 8) & kInt54Mask;
672 } else if (first == (kCodePrefix | FB_CS_INT64)) {
673 p += 1;
674 CHECK_ENOUGH(8, p, n);
675 int64_t val = (int64_t)ntohll(*reinterpret_cast<const int64_t*>(buf + p));
676 p += 8;
677 out = val;
679 } else {
680 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE;
683 return 0;
686 const StaticString s_empty("");
688 int fb_compact_unserialize_from_buffer(
689 Variant& out, const char* buf, int n, int& p) {
691 CHECK_ENOUGH(1, p, n);
692 int code = (unsigned char)buf[p];
693 if ((code & ~kCodeMask) != kCodePrefix ||
694 (code & kCodeMask) == FB_CS_INT16 ||
695 (code & kCodeMask) == FB_CS_INT32 ||
696 (code & kCodeMask) == FB_CS_INT64) {
698 int64_t val;
699 int err = fb_compact_unserialize_int64_from_buffer(val, buf, n, p);
700 if (err) {
701 return err;
703 out = (int64_t)val;
704 return 0;
706 p += 1;
707 code &= kCodeMask;
708 switch (code) {
709 case FB_CS_NULL:
710 out = uninit_null();
711 break;
713 case FB_CS_TRUE:
714 out = true;
715 break;
717 case FB_CS_FALSE:
718 out = false;
719 break;
721 case FB_CS_DOUBLE:
723 CHECK_ENOUGH(8, p, n);
724 double d = *reinterpret_cast<const double*>(buf + p);
725 p += 8;
726 out = d;
727 break;
730 case FB_CS_STRING_0:
732 out = s_empty;
733 break;
736 case FB_CS_STRING_1:
737 case FB_CS_STRING_N:
739 int64_t len = 1;
740 if (code == FB_CS_STRING_N) {
741 int err = fb_compact_unserialize_int64_from_buffer(len, buf, n, p);
742 if (err) {
743 return err;
747 CHECK_ENOUGH(len, p, n);
748 out = Variant::attach(StringData::Make(buf + p, len, CopyString));
749 p += len;
750 break;
753 case FB_CS_LIST_MAP:
754 case FB_CS_VECTOR:
756 Array arr = Array::Create();
757 int64_t i = 0;
758 while (p < n && buf[p] != (char)(kCodePrefix | FB_CS_STOP)) {
759 if (buf[p] == (char)(kCodePrefix | FB_CS_SKIP)) {
760 ++i;
761 ++p;
762 } else {
763 Variant value;
764 int err = fb_compact_unserialize_from_buffer(value, buf, n, p);
765 if (err) {
766 return err;
768 arr.set(i++, value);
772 // Consume STOP
773 CHECK_ENOUGH(1, p, n);
774 p += 1;
776 out = arr;
777 break;
780 case FB_CS_MAP:
782 Array arr = Array::Create();
783 while (p < n && buf[p] != (char)(kCodePrefix | FB_CS_STOP)) {
784 Variant key;
785 int err = fb_compact_unserialize_from_buffer(key, buf, n, p);
786 if (err) {
787 return err;
789 Variant value;
790 err = fb_compact_unserialize_from_buffer(value, buf, n, p);
791 if (err) {
792 return err;
794 if (key.getType() == KindOfInt64) {
795 arr.set(key.toInt64(), value);
796 } else if (key.getType() == KindOfString ||
797 key.getType() == KindOfPersistentString) {
798 arr.set(key, value);
799 } else {
800 return FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE;
804 // Consume STOP
805 CHECK_ENOUGH(1, p, n);
806 p += 1;
808 out = arr;
809 break;
812 default:
813 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE;
816 return 0;
819 Variant fb_compact_unserialize(const char* str, int len,
820 VRefParam success,
821 VRefParam errcode /* = uninit_variant */) {
823 Variant ret;
824 int p = 0;
825 int err = fb_compact_unserialize_from_buffer(ret, str, len, p);
826 if (err) {
827 success.assignIfRef(false);
828 errcode.assignIfRef(err);
829 return false;
831 success.assignIfRef(true);
832 errcode.assignIfRef(init_null());
833 return ret;
836 Variant HHVM_FUNCTION(fb_compact_unserialize,
837 const Variant& thing, VRefParam success,
838 VRefParam errcode /* = uninit_variant */) {
839 if (!thing.isString()) {
840 success.assignIfRef(false);
841 errcode.assignIfRef(FB_UNSERIALIZE_NONSTRING_VALUE);
842 return false;
845 String s = thing.toString();
846 return fb_compact_unserialize(s.data(), s.size(), ref(success),
847 ref(errcode));
850 ///////////////////////////////////////////////////////////////////////////////
852 bool HHVM_FUNCTION(fb_utf8ize, VRefParam input) {
853 String s = input.toString();
854 const char* const srcBuf = s.data();
855 int32_t srcLenBytes = s.size();
857 if (s.size() < 0 || s.size() > INT_MAX) {
858 return false; // Too long.
861 // Preflight to avoid allocation if the entire input is valid.
862 int32_t srcPosBytes;
863 for (srcPosBytes = 0; srcPosBytes < srcLenBytes; /* U8_NEXT increments */) {
864 // This is lame, but gcc doesn't optimize U8_NEXT very well
865 if (srcBuf[srcPosBytes] > 0 && srcBuf[srcPosBytes] <= 0x7f) {
866 srcPosBytes++; // U8_NEXT would increment this
867 continue;
869 UChar32 curCodePoint;
870 // U8_NEXT() always advances srcPosBytes; save in case curCodePoint invalid
871 int32_t savedSrcPosBytes = srcPosBytes;
872 U8_NEXT(srcBuf, srcPosBytes, srcLenBytes, curCodePoint);
873 if (curCodePoint <= 0) {
874 // curCodePoint invalid; back up so we'll fix it in the loop below.
875 srcPosBytes = savedSrcPosBytes;
876 break;
880 if (srcPosBytes == srcLenBytes) {
881 // it's all valid
882 return true;
885 // There are invalid bytes. Allocate memory, then copy the input, replacing
886 // invalid sequences with either the substitution character or nothing,
887 // depending on the value of RuntimeOption::Utf8izeReplace.
889 // Worst case, every remaining byte is invalid, taking a 3-byte substitution.
890 int32_t bytesRemaining = srcLenBytes - srcPosBytes;
891 uint64_t dstMaxLenBytes = srcPosBytes + (RuntimeOption::Utf8izeReplace ?
892 bytesRemaining * U8_LENGTH(SUBSTITUTION_CHARACTER) :
893 bytesRemaining);
894 if (dstMaxLenBytes > INT_MAX) {
895 return false; // Too long.
897 String dstStr(dstMaxLenBytes, ReserveString);
898 char *dstBuf = dstStr.mutableData();
900 // Copy valid bytes found so far as one solid block.
901 memcpy(dstBuf, srcBuf, srcPosBytes);
903 // Iterate through the remaining bytes.
904 int32_t dstPosBytes = srcPosBytes; // already copied srcPosBytes
905 for (/* already init'd */; srcPosBytes < srcLenBytes; /* see U8_NEXT */) {
906 UChar32 curCodePoint;
907 // This is lame, but gcc doesn't optimize U8_NEXT very well
908 if (srcBuf[srcPosBytes] > 0 && srcBuf[srcPosBytes] <= 0x7f) {
909 curCodePoint = srcBuf[srcPosBytes++]; // U8_NEXT would increment
910 } else {
911 U8_NEXT(srcBuf, srcPosBytes, srcLenBytes, curCodePoint);
913 if (curCodePoint <= 0) {
914 // Invalid UTF-8 sequence.
915 // N.B. We consider a null byte an invalid sequence.
916 if (!RuntimeOption::Utf8izeReplace) {
917 continue; // Omit invalid sequence
919 curCodePoint = SUBSTITUTION_CHARACTER; // Replace invalid sequences
921 // We know that resultBuffer > total possible length.
922 U8_APPEND_UNSAFE(dstBuf, dstPosBytes, curCodePoint);
924 assert(dstPosBytes <= dstMaxLenBytes);
925 input.assignIfRef(dstStr.shrink(dstPosBytes));
926 return true;
930 * Private utf8_strlen implementation.
932 * Returns count of code points in input, substituting 1 code point per invalid
933 * sequence.
935 * deprecated=true: instead return byte count on invalid UTF-8 sequence.
937 static int fb_utf8_strlen_impl(const String& input, bool deprecated) {
938 // Count, don't modify.
939 int32_t sourceLength = input.size();
940 const char* const sourceBuffer = input.data();
941 int64_t num_code_points = 0;
943 for (int32_t sourceOffset = 0; sourceOffset < sourceLength; ) {
944 UChar32 sourceCodePoint;
945 // U8_NEXT() is guaranteed to advance sourceOffset by 1-4 each time it's
946 // invoked.
947 U8_NEXT(sourceBuffer, sourceOffset, sourceLength, sourceCodePoint);
948 if (deprecated && sourceCodePoint < 0) {
949 return sourceLength; // return byte count on invalid sequence
951 num_code_points++;
953 return num_code_points;
956 int64_t HHVM_FUNCTION(fb_utf8_strlen, const String& input) {
957 return fb_utf8_strlen_impl(input, /* deprecated */ false);
960 int64_t HHVM_FUNCTION(fb_utf8_strlen_deprecated, const String& input) {
961 return fb_utf8_strlen_impl(input, /* deprecated */ true);
965 * Private helper; requires non-negative firstCodePoint and desiredCodePoints.
967 static String fb_utf8_substr_simple(const String& str,
968 int32_t firstCodePoint,
969 int32_t numDesiredCodePoints) {
970 const char* const srcBuf = str.data();
971 int32_t srcLenBytes = str.size(); // May truncate; checked before use below.
973 assert(firstCodePoint >= 0); // Wrapper fixes up negative starting positions.
974 assert(numDesiredCodePoints > 0); // Wrapper fixes up negative/zero length.
975 if (str.size() <= 0 ||
976 str.size() > INT_MAX ||
977 firstCodePoint >= srcLenBytes) {
978 return empty_string();
981 // Cannot be more code points than bytes in input. This typically reduces
982 // the INT_MAX default value to something more reasonable.
983 numDesiredCodePoints = std::min(numDesiredCodePoints,
984 srcLenBytes - firstCodePoint);
986 // Pre-allocate the result.
987 // the worst case can come from one of two sources:
988 // - every code point could be the substitution char (3 bytes)
989 // giving us numDesiredCodePoints * 3
990 // - every code point could be 4 bytes long, giving us
991 // numDesiredCodePoints * 4 - but capped by the length of the input
992 uint64_t dstMaxLenBytes =
993 std::min((uint64_t)numDesiredCodePoints * 4,
994 (uint64_t)srcLenBytes - firstCodePoint);
995 dstMaxLenBytes = std::max(dstMaxLenBytes,
996 (uint64_t)numDesiredCodePoints *
997 U8_LENGTH(SUBSTITUTION_CHARACTER));
998 if (dstMaxLenBytes > INT_MAX) {
999 return empty_string(); // Too long.
1001 String dstStr(dstMaxLenBytes, ReserveString);
1002 char* dstBuf = dstStr.mutableData();
1003 int32_t dstPosBytes = 0;
1005 // Iterate through src's codepoints; srcPosBytes is incremented by U8_NEXT.
1006 for (int32_t srcPosBytes = 0, srcPosCodePoints = 0;
1007 srcPosBytes < srcLenBytes && // more available
1008 srcPosCodePoints < firstCodePoint + numDesiredCodePoints; // want more
1009 srcPosCodePoints++) {
1011 // U8_NEXT() advances sourceBytePos by 1-4 each time it's invoked.
1012 UChar32 curCodePoint;
1013 U8_NEXT(srcBuf, srcPosBytes, srcLenBytes, curCodePoint);
1015 if (srcPosCodePoints >= firstCodePoint) {
1016 // Copy this code point into the result.
1017 if (curCodePoint < 0) {
1018 curCodePoint = SUBSTITUTION_CHARACTER; // replace invalid sequences
1020 // We know that resultBuffer > total possible length.
1021 // U8_APPEND_UNSAFE updates dstPosBytes.
1022 U8_APPEND_UNSAFE(dstBuf, dstPosBytes, curCodePoint);
1026 assert(dstPosBytes <= dstMaxLenBytes);
1027 if (dstPosBytes > 0) {
1028 dstStr.shrink(dstPosBytes);
1029 return dstStr;
1031 return empty_string();
1034 String HHVM_FUNCTION(fb_utf8_substr, const String& str, int64_t start,
1035 int64_t length /* = INT_MAX */) {
1036 if (length > INT_MAX) {
1037 length = INT_MAX;
1039 // For negative start or length, calculate start and length values
1040 // based on total code points.
1041 if (start < 0 || length < 0) {
1042 // Get number of code points assuming we substitute invalid sequences.
1043 Variant utf8StrlenResult = HHVM_FN(fb_utf8_strlen)(str);
1044 int32_t sourceNumCodePoints = utf8StrlenResult.toInt32();
1046 if (start < 0) {
1047 // Negative means first character is start'th code point from end.
1048 // e.g., -1 means start with the last code point.
1049 start = sourceNumCodePoints + start; // adding negative start
1051 if (length < 0) {
1052 // Negative means omit last abs(length) code points.
1053 length = sourceNumCodePoints - start + length; // adding negative length
1056 if (start < 0 || length <= 0) {
1057 return empty_string(); // Empty result
1060 return fb_utf8_substr_simple(str, start, length);
1063 ///////////////////////////////////////////////////////////////////////////////
1065 bool HHVM_FUNCTION(fb_intercept, const String& name, const Variant& handler,
1066 const Variant& data /* = uninit_variant */) {
1067 return register_intercept(name, handler, data);
1070 bool is_dangerous_varenv_function(const StringData* name) {
1071 auto const f = Unit::lookupFunc(name);
1072 // Functions can which can access the caller's frame are always builtin, so if
1073 // its not already defined, we know it can't be one.
1074 return f && f->accessesCallerFrame();
1077 bool HHVM_FUNCTION(fb_rename_function, const String& orig_func_name,
1078 const String& new_func_name) {
1079 if (orig_func_name.empty() || new_func_name.empty() ||
1080 orig_func_name.get()->isame(new_func_name.get())) {
1081 throw_invalid_argument("unable to rename %s", orig_func_name.data());
1082 return false;
1085 if (!function_exists(orig_func_name)) {
1086 raise_warning("fb_rename_function(%s, %s) failed: %s does not exist!",
1087 orig_func_name.data(), new_func_name.data(),
1088 orig_func_name.data());
1089 return false;
1092 if (is_dangerous_varenv_function(orig_func_name.get())) {
1093 raise_warning(
1094 "fb_rename_function(%s, %s) failed: rename of functions that "
1095 "affect variable environments is not allowed",
1096 orig_func_name.data(), new_func_name.data());
1097 return false;
1100 if (function_exists(new_func_name)) {
1101 if (new_func_name.data()[0] != '1') {
1102 raise_warning("fb_rename_function(%s, %s) failed: %s already exists!",
1103 orig_func_name.data(), new_func_name.data(),
1104 new_func_name.data());
1105 return false;
1109 rename_function(orig_func_name, new_func_name);
1110 return true;
1113 ///////////////////////////////////////////////////////////////////////////////
1114 // call_user_func extensions
1115 // Linked in via fb.json.idl for now - Need OptFunc solution...
1117 Array HHVM_FUNCTION(fb_call_user_func_safe,
1118 const Variant& function,
1119 const Array& argv) {
1120 return HHVM_FN(fb_call_user_func_array_safe)(function, argv);
1123 Variant HHVM_FUNCTION(fb_call_user_func_safe_return,
1124 const Variant& function,
1125 const Variant& def,
1126 const Array& argv) {
1127 if (is_callable(function)) {
1128 return vm_call_user_func(function, argv);
1130 return def;
1133 Array HHVM_FUNCTION(fb_call_user_func_array_safe,
1134 const Variant& function,
1135 const Array& params) {
1136 if (is_callable(function)) {
1137 return make_packed_array(true, vm_call_user_func(function, params));
1139 return make_packed_array(false, uninit_variant);
1142 ///////////////////////////////////////////////////////////////////////////////
1144 Variant HHVM_FUNCTION(fb_get_code_coverage, bool flush) {
1145 ThreadInfo *ti = ThreadInfo::s_threadInfo.getNoCheck();
1146 if (ti->m_reqInjectionData.getCoverage()) {
1147 Array ret = ti->m_coverage->Report();
1148 if (flush) {
1149 ti->m_coverage->Reset();
1151 return ret;
1153 return false;
1156 void HHVM_FUNCTION(fb_enable_code_coverage) {
1157 ThreadInfo *ti = ThreadInfo::s_threadInfo.getNoCheck();
1158 ti->m_coverage->Reset();
1159 ti->m_reqInjectionData.setCoverage(true);;
1160 if (g_context->isNested()) {
1161 raise_notice("Calling fb_enable_code_coverage from a nested "
1162 "VM instance may cause unpredicable results");
1164 throw VMSwitchModeBuiltin();
1167 Variant HHVM_FUNCTION(fb_disable_code_coverage) {
1168 ThreadInfo *ti = ThreadInfo::s_threadInfo.getNoCheck();
1169 ti->m_reqInjectionData.setCoverage(false);
1170 Array ret = ti->m_coverage->Report();
1171 ti->m_coverage->Reset();
1172 return ret;
1175 ///////////////////////////////////////////////////////////////////////////////
1177 bool HHVM_FUNCTION(fb_output_compression, bool new_value) {
1178 Transport *transport = g_context->getTransport();
1179 if (transport) {
1180 bool rv = transport->isCompressionEnabled();
1181 if (new_value) {
1182 transport->enableCompression();
1183 } else {
1184 transport->disableCompression();
1186 return rv;
1188 return false;
1191 void HHVM_FUNCTION(fb_set_exit_callback, const Variant& function) {
1192 g_context->setExitCallback(function);
1195 const StaticString
1196 s_flush_stats("flush_stats"),
1197 s_chunk_stats("chunk_stats"),
1198 s_total("total"),
1199 s_sent("sent"),
1200 s_time("time");
1202 int64_t HHVM_FUNCTION(fb_get_last_flush_size) {
1203 Transport *transport = g_context->getTransport();
1204 return transport ? transport->getLastChunkSentSize() : 0;
1207 extern Array stat_impl(struct stat*); // ext_file.cpp
1209 template<class Function>
1210 static Variant do_lazy_stat(Function dostat, const String& filename) {
1211 struct stat sb;
1212 if (dostat(File::TranslatePathWithFileCache(filename).c_str(), &sb)) {
1213 Logger::Verbose("%s/%d: %s", __FUNCTION__, __LINE__,
1214 folly::errnoStr(errno).c_str());
1215 return false;
1217 return stat_impl(&sb);
1220 Variant HHVM_FUNCTION(fb_lazy_lstat, const String& filename) {
1221 if (!FileUtil::checkPathAndWarn(filename, __FUNCTION__ + 2, 1)) {
1222 return false;
1224 return do_lazy_stat(StatCache::lstat, filename);
1227 Variant HHVM_FUNCTION(fb_lazy_realpath, const String& filename) {
1228 if (!FileUtil::checkPathAndWarn(filename, __FUNCTION__ + 2, 1)) {
1229 return false;
1232 return StatCache::realpath(filename.c_str());
1235 ///////////////////////////////////////////////////////////////////////////////
1237 EXTERNALLY_VISIBLE
1238 void const_load() {
1239 // TODO(8117903): Unused; remove after updating www side.
1242 ///////////////////////////////////////////////////////////////////////////////
1244 struct FBExtension : Extension {
1245 FBExtension(): Extension("fb", "1.0.0") {}
1247 void moduleInit() override {
1248 HHVM_RC_BOOL_SAME(HHVM_FACEBOOK);
1249 HHVM_RC_INT_SAME(FB_UNSERIALIZE_NONSTRING_VALUE);
1250 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_END);
1251 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE);
1252 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE);
1254 HHVM_RC_INT(FB_SERIALIZE_HACK_ARRAYS, k_FB_SERIALIZE_HACK_ARRAYS);
1256 HHVM_FE(fb_serialize);
1257 HHVM_FE(fb_unserialize);
1258 HHVM_FE(fb_compact_serialize);
1259 HHVM_FE(fb_compact_unserialize);
1260 HHVM_FE(fb_utf8ize);
1261 HHVM_FE(fb_utf8_strlen);
1262 HHVM_FE(fb_utf8_strlen_deprecated);
1263 HHVM_FE(fb_utf8_substr);
1264 HHVM_FE(fb_intercept);
1265 HHVM_FE(fb_rename_function);
1266 HHVM_FE(fb_get_code_coverage);
1267 HHVM_FE(fb_enable_code_coverage);
1268 HHVM_FE(fb_disable_code_coverage);
1269 HHVM_FE(fb_output_compression);
1270 HHVM_FE(fb_set_exit_callback);
1271 HHVM_FE(fb_get_last_flush_size);
1272 HHVM_FE(fb_lazy_lstat);
1273 HHVM_FE(fb_lazy_realpath);
1274 HHVM_FE(fb_call_user_func_safe);
1275 HHVM_FE(fb_call_user_func_safe_return);
1276 HHVM_FE(fb_call_user_func_array_safe);
1278 loadSystemlib();
1280 } s_fb_extension;
1282 ///////////////////////////////////////////////////////////////////////////////