2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2016 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/fb/ext_fb.h"
21 #include <unicode/uchar.h>
22 #include <unicode/utf8.h>
28 #include <folly/String.h>
29 #include <folly/portability/Sockets.h>
31 #include "hphp/util/htonll.h"
32 #include "hphp/util/logger.h"
33 #include "hphp/runtime/base/array-init.h"
34 #include "hphp/runtime/base/builtin-functions.h"
35 #include "hphp/runtime/base/code-coverage.h"
36 #include "hphp/runtime/base/externals.h"
37 #include "hphp/runtime/base/file.h"
38 #include "hphp/runtime/base/file-util.h"
39 #include "hphp/runtime/base/plain-file.h"
40 #include "hphp/runtime/base/unit-cache.h"
41 #include "hphp/runtime/base/intercept.h"
42 #include "hphp/runtime/base/runtime-option.h"
43 #include "hphp/runtime/base/stat-cache.h"
44 #include "hphp/runtime/base/string-buffer.h"
45 #include "hphp/runtime/base/string-util.h"
46 #include "hphp/runtime/base/thread-info.h"
47 #include "hphp/runtime/ext/std/ext_std_function.h"
48 #include "hphp/runtime/ext/fb/FBSerialize/FBSerialize.h"
49 #include "hphp/runtime/ext/fb/VariantController.h"
50 #include "hphp/runtime/vm/unwind.h"
52 #include "hphp/parser/parser.h"
56 // fb_serialize options
57 const int64_t k_FB_SERIALIZE_HACK_ARRAYS
= 1<<1;
59 ///////////////////////////////////////////////////////////////////////////////
61 static const UChar32 SUBSTITUTION_CHARACTER
= 0xFFFD;
63 #define FB_UNSERIALIZE_NONSTRING_VALUE 0x0001
64 #define FB_UNSERIALIZE_UNEXPECTED_END 0x0002
65 #define FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE 0x0003
66 #define FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE 0x0004
69 # define HHVM_FACEBOOK true
71 # define HHVM_FACEBOOK false
74 ///////////////////////////////////////////////////////////////////////////////
78 s_IMemoizeParam("HH\\IMemoizeParam"),
79 s_getInstanceKey("getInstanceKey");
81 ///////////////////////////////////////////////////////////////////////////////
83 /* enum of thrift types */
104 /* Return the smallest size int that can store the value */
105 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
106 ((x) == ((int16_t)x)) ? 2 : \
107 ((x) == ((int32_t)x)) ? 4 : 8)
109 /* Return the smallest (supported) unsigned length that can store the value */
110 #define LEN_SIZE(x) ((((unsigned)x) == ((uint8_t)x)) ? 1 : 4)
112 Variant
HHVM_FUNCTION(fb_serialize
, const Variant
& thing
, int64_t options
) {
114 if (options
& k_FB_SERIALIZE_HACK_ARRAYS
) {
115 size_t len
= HPHP::serialize
116 ::FBSerializer
<VariantControllerUsingHackArrays
>
117 ::serializedSize(thing
);
118 String
s(len
, ReserveString
);
120 ::FBSerializer
<VariantControllerUsingHackArrays
>
121 ::serialize(thing
, s
.mutableData());
126 HPHP::serialize::FBSerializer
<VariantController
>::serializedSize(thing
);
127 String
s(len
, ReserveString
);
128 HPHP::serialize::FBSerializer
<VariantController
>::serialize(
129 thing
, s
.mutableData());
133 } catch (const HPHP::serialize::KeysetSerializeError
&) {
134 SystemLib::throwInvalidArgumentExceptionObject(
135 "Keysets cannot be serialized with fb_serialize"
137 } catch (const HPHP::serialize::HackArraySerializeError
&) {
138 SystemLib::throwInvalidArgumentExceptionObject(
139 "Serializing Hack arrays requires the FB_SERIALIZE_HACK_ARRAYS "
140 "option to be provided"
142 } catch (const HPHP::serialize::SerializeError
&) {
147 Variant
HHVM_FUNCTION(fb_unserialize
,
148 const Variant
& thing
,
151 if (thing
.isString()) {
152 String sthing
= thing
.toString();
154 if (sthing
.size() && (sthing
.data()[0] & 0x80)) {
155 return fb_compact_unserialize(sthing
.data(), sthing
.size(),
158 return fb_unserialize(sthing
.data(), sthing
.size(), success
, options
);
162 success
.assignIfRef(false);
166 Variant
fb_unserialize(const char* str
,
171 if (options
& k_FB_SERIALIZE_HACK_ARRAYS
) {
172 auto res
= HPHP::serialize
173 ::FBUnserializer
<VariantControllerUsingHackArrays
>
174 ::unserialize(folly::StringPiece(str
, len
));
175 success
.assignIfRef(true);
178 auto res
= HPHP::serialize::FBUnserializer
<VariantController
>
179 ::unserialize(folly::StringPiece(str
, len
));
180 success
.assignIfRef(true);
183 } catch (const HPHP::serialize::UnserializeError
&) {
184 success
.assignIfRef(false);
189 ///////////////////////////////////////////////////////////////////////////////
192 * FB Compact Serialize
193 * ====================
195 * === Compatibility with fb_unserialize ===
197 * Check the high bit in the first byte of the serialized string.
198 * If it's set, the string is fb_compact_serialize'd, otherwise it's
203 * A value is serialized as a string <c> <data> where c is a byte (0xf0 | code),
206 * 0 (INT16): data is 2 bytes, network order signed int16
207 * 1 (INT32): data is 4 bytes, network order signed int32
208 * 2 (INT64): data is 8 bytes, network order signed int64
209 * All of these represent an int64 value.
211 * 3 (NULL): no data, null value
214 * 5 (FALSE): no data, boolean value
216 * 6 (DOUBLE): data is 8 bytes, double value
218 * 7 (STRING_0): no data
219 * 8 (STRING_1): one char of data
220 * 9 (STRING_N): followed by n as a serialized int64, followed by n characters
221 * All of these represent a string value.
223 * 10 (LIST_MAP): followed by serialized values until STOP is seen.
224 * Represents a map with numeric keys 0, 1, ..., n-1 (but see SKIP below).
226 * 11 (MAP): followed by serialized key/value pairs until STOP
227 * is seen. Represents a map with arbitrary int64 or string keys.
230 * Marks the end of a LIST or a MAP.
233 * If seen as an entry in a LIST_MAP, the next index in the sequence will
234 * be skipped. E.g. array(0 => 'a', 1 => 'b', 3 => 'c) will be encoded as
235 * (LIST_MAP, 'a', 'b', SKIP, 'c') instead of
236 * (MAP, 0, 'a', 1, 'b', 3, 'c').
238 * 14 (VECTOR): followed by n serialized values until STOP is seen.
239 * Represents a vector of n values.
241 * In addition, if <c> & 0xf0 != 0xf0, most significant bits of <c> mean:
243 * - 0....... 7-bit unsigned int
244 * (NOTE: not used for the sole int value due to the compatibility
246 * - 10...... + 6 more bytes, 54-bit unsigned int
247 * - 110..... + 1 more byte, 13-bit unsigned int
248 * - 1110.... + 2 more bytes, 20-bit unsigned int
250 * All of these represent an int64 value.
253 enum FbCompactSerializeCode
{
273 static_assert(FB_CS_MAX_CODE
<= '$',
274 "FB_CS_MAX_CODE must be less than ASCII '$' or serialize_memoize_param() "
275 "could produce strings that when used as array keys could collide with "
276 "keys it produces.");
279 const uint64_t kInt7Mask
= 0x7f;
280 const uint64_t kInt7Prefix
= 0x00;
282 // 2 bytes: 110<13 bits>
283 const uint64_t kInt13Mask
= (1ULL << 13) - 1;
284 const uint64_t kInt13PrefixMsbMask
= 0xe0;
285 const uint64_t kInt13PrefixMsb
= 0xc0;
286 const uint64_t kInt13Prefix
= kInt13PrefixMsb
<< (1 * 8);
288 // 3 bytes: 1110<20 bits>
289 const uint64_t kInt20Mask
= (1ULL << 20) - 1;
290 const uint64_t kInt20PrefixMsbMask
= 0xf0;
291 const uint64_t kInt20PrefixMsb
= 0xe0;
292 const uint64_t kInt20Prefix
= kInt20PrefixMsb
<< (2 * 8);
294 // 7 bytes: 10<54 bits>
295 const uint64_t kInt54Mask
= (1ULL << 54) - 1;
296 const uint64_t kInt54PrefixMsbMask
= 0xc0;
297 const uint64_t kInt54PrefixMsb
= 0x80;
298 const uint64_t kInt54Prefix
= kInt54PrefixMsb
<< (6 * 8);
300 // 1 byte: 1111<4 bits>
301 const uint64_t kCodeMask
= 0x0f;
302 const uint64_t kCodePrefix
= 0xf0;
305 static void fb_compact_serialize_code(StringBuffer
& sb
,
306 FbCompactSerializeCode code
) {
307 assert(code
== (code
& kCodeMask
));
308 uint8_t v
= (kCodePrefix
| code
);
309 sb
.append(reinterpret_cast<char*>(&v
), 1);
312 static void fb_compact_serialize_int64(StringBuffer
& sb
, int64_t val
) {
313 if (val
>= 0 && (uint64_t)val
<= kInt7Mask
) {
315 sb
.append(reinterpret_cast<char*>(&nval
), 1);
317 } else if (val
>= 0 && (uint64_t)val
<= kInt13Mask
) {
318 uint16_t nval
= htons(kInt13Prefix
| val
);
319 sb
.append(reinterpret_cast<char*>(&nval
), 2);
321 } else if (val
== (int64_t)(int16_t)val
) {
322 fb_compact_serialize_code(sb
, FB_CS_INT16
);
323 uint16_t nval
= htons(val
);
324 sb
.append(reinterpret_cast<char*>(&nval
), 2);
326 } else if (val
>= 0 && (uint64_t)val
<= kInt20Mask
) {
327 uint32_t nval
= htonl(kInt20Prefix
| val
);
328 // Skip most significant byte
329 sb
.append(reinterpret_cast<char*>(&nval
) + 1, 3);
331 } else if (val
== (int64_t)(int32_t)val
) {
332 fb_compact_serialize_code(sb
, FB_CS_INT32
);
333 uint32_t nval
= htonl(val
);
334 sb
.append(reinterpret_cast<char*>(&nval
), 4);
336 } else if (val
>= 0 && (uint64_t)val
<= kInt54Mask
) {
337 uint64_t nval
= htonll(kInt54Prefix
| val
);
338 // Skip most significant byte
339 sb
.append(reinterpret_cast<char*>(&nval
) + 1, 7);
342 fb_compact_serialize_code(sb
, FB_CS_INT64
);
343 uint64_t nval
= htonll(val
);
344 sb
.append(reinterpret_cast<char*>(&nval
), 8);
348 static void fb_compact_serialize_string(StringBuffer
& sb
, const String
& str
) {
349 int len
= str
.size();
351 fb_compact_serialize_code(sb
, FB_CS_STRING_0
);
354 fb_compact_serialize_code(sb
, FB_CS_STRING_1
);
356 fb_compact_serialize_code(sb
, FB_CS_STRING_N
);
357 fb_compact_serialize_int64(sb
, len
);
359 sb
.append(str
.data(), len
);
363 static bool fb_compact_serialize_is_list(const Array
& arr
, int64_t& index_limit
) {
364 index_limit
= arr
.size();
365 int64_t max_index
= 0;
366 for (ArrayIter
it(arr
); it
; ++it
) {
367 Variant key
= it
.first();
368 if (!key
.isNumeric()) {
371 int64_t index
= key
.toInt64();
375 if (index
> max_index
) {
380 if (max_index
>= arr
.size() * 2) {
381 // Might as well store it as a map
385 index_limit
= max_index
+ 1;
389 static int fb_compact_serialize_variant(StringBuffer
& sd
,
390 const Variant
& var
, int depth
, FBCompactSerializeBehavior behavior
);
392 static void fb_compact_serialize_array_as_list_map(
393 StringBuffer
& sb
, const Array
& arr
, int64_t index_limit
, int depth
,
394 FBCompactSerializeBehavior behavior
) {
395 fb_compact_serialize_code(sb
, FB_CS_LIST_MAP
);
396 for (int64_t i
= 0; i
< index_limit
; ++i
) {
398 fb_compact_serialize_variant(sb
, arr
[i
], depth
+ 1, behavior
);
400 fb_compact_serialize_code(sb
, FB_CS_SKIP
);
403 fb_compact_serialize_code(sb
, FB_CS_STOP
);
406 static void fb_compact_serialize_vec(
407 StringBuffer
& sb
, const Array
& arr
, int depth
,
408 FBCompactSerializeBehavior behavior
) {
409 fb_compact_serialize_code(sb
, FB_CS_LIST_MAP
);
410 PackedArray::IterateV(
412 [&](const TypedValue
* v
) {
413 fb_compact_serialize_variant(sb
, tvAsCVarRef(v
), depth
+ 1, behavior
);
416 fb_compact_serialize_code(sb
, FB_CS_STOP
);
419 static void fb_compact_serialize_array_as_map(
420 StringBuffer
& sb
, const Array
& arr
, int depth
,
421 FBCompactSerializeBehavior behavior
) {
422 fb_compact_serialize_code(sb
, FB_CS_MAP
);
425 [&](const TypedValue
* k
, const TypedValue
* v
) {
427 fb_compact_serialize_string(sb
, StrNR
{k
->m_data
.pstr
});
429 assertx(k
->m_type
== KindOfInt64
);
430 fb_compact_serialize_int64(sb
, k
->m_data
.num
);
432 fb_compact_serialize_variant(sb
, tvAsCVarRef(v
), depth
+ 1, behavior
);
435 fb_compact_serialize_code(sb
, FB_CS_STOP
);
438 static void fb_compact_serialize_keyset(
439 StringBuffer
& sb
, const Array
& arr
, FBCompactSerializeBehavior behavior
) {
440 fb_compact_serialize_code(sb
, FB_CS_MAP
);
442 SetArray::asSet(arr
.get()),
443 [&](const TypedValue
* v
) {
445 fb_compact_serialize_string(sb
, StrNR
{v
->m_data
.pstr
});
446 fb_compact_serialize_string(sb
, StrNR
{v
->m_data
.pstr
});
448 assertx(v
->m_type
== KindOfInt64
);
449 fb_compact_serialize_int64(sb
, v
->m_data
.num
);
450 fb_compact_serialize_int64(sb
, v
->m_data
.num
);
454 fb_compact_serialize_code(sb
, FB_CS_STOP
);
457 static int fb_compact_serialize_variant(StringBuffer
& sb
,
460 FBCompactSerializeBehavior behavior
) {
462 if (behavior
== FBCompactSerializeBehavior::MemoizeParam
) {
463 SystemLib::throwInvalidArgumentExceptionObject(
464 "Array depth exceeded");
470 switch (var
.getType()) {
473 fb_compact_serialize_code(sb
, FB_CS_NULL
);
478 fb_compact_serialize_code(sb
, FB_CS_TRUE
);
480 fb_compact_serialize_code(sb
, FB_CS_FALSE
);
485 fb_compact_serialize_int64(sb
, var
.toInt64());
489 fb_compact_serialize_code(sb
, FB_CS_DOUBLE
);
490 double d
= var
.toDouble();
491 sb
.append(reinterpret_cast<char*>(&d
), 8);
495 case KindOfPersistentString
:
497 fb_compact_serialize_string(sb
, var
.toString());
500 case KindOfPersistentVec
:
502 Array arr
= var
.toArray();
503 assert(arr
->isVecArray());
504 fb_compact_serialize_vec(sb
, std::move(arr
), depth
, behavior
);
508 case KindOfPersistentDict
:
510 Array arr
= var
.toArray();
511 assert(arr
->isDict());
512 fb_compact_serialize_array_as_map(sb
, std::move(arr
), depth
, behavior
);
516 case KindOfPersistentKeyset
:
518 Array arr
= var
.toArray();
519 assert(arr
->isKeyset());
520 fb_compact_serialize_keyset(sb
, std::move(arr
), behavior
);
524 case KindOfPersistentArray
:
526 Array arr
= var
.toArray();
527 assert(arr
->isPHPArray());
529 if (fb_compact_serialize_is_list(arr
, index_limit
)) {
530 fb_compact_serialize_array_as_list_map(sb
, std::move(arr
), index_limit
,
533 fb_compact_serialize_array_as_map(sb
, std::move(arr
), depth
, behavior
);
539 if (behavior
== FBCompactSerializeBehavior::MemoizeParam
) {
540 Object obj
= var
.toObject();
542 if (obj
->isCollection()) {
543 fb_compact_serialize_variant(sb
, obj
->toArray(), depth
, behavior
);
547 if (!obj
.instanceof(s_IMemoizeParam
)) {
548 auto msg
= folly::format(
549 "Cannot serialize object of type {} because it does not implement "
551 obj
->getClassName().asString()).str();
553 SystemLib::throwInvalidArgumentExceptionObject(msg
);
556 // Marker that shows that this was an obj so it doesn't collide with
558 fb_compact_serialize_code(sb
, FB_CS_OBJ
);
560 Variant ser
= obj
->o_invoke_few_args(s_getInstanceKey
, 0);
561 fb_compact_serialize_string(sb
, ser
.toString());
565 // If not FBCompactSerializeBehavior::MemoizeParam fall-through to default
570 fb_compact_serialize_code(sb
, FB_CS_NULL
);
572 "fb_compact_serialize(): unable to serialize object/resource/ref/class"
577 if (behavior
== FBCompactSerializeBehavior::MemoizeParam
) {
578 SystemLib::throwInvalidArgumentExceptionObject(
579 folly::format("Cannot Serialize unexpected type {}",
580 tname(var
.getType())).str()
586 String
fb_compact_serialize(const Variant
& thing
,
587 FBCompactSerializeBehavior behavior
) {
589 * If thing is a single int value [0, 127] normally we would serialize
590 * it as a single byte (7 bit unsigned int).
592 * However, we want highest bit of the first byte to always be set so
593 * that we can tell if the string is fb_serialize'd or fb_compact_serialize'd.
595 * So we force to serialize it as 13 bit unsigned int instead.
597 if (thing
.getType() == KindOfInt64
) {
598 int64_t val
= thing
.toInt64();
599 if (val
>= 0 && (uint64_t)val
<= kInt7Mask
) {
600 String
s(2, ReserveString
);
601 *(uint16_t*)(s
.mutableData()) = (uint16_t)htons(kInt13Prefix
| val
);
608 if (fb_compact_serialize_variant(sb
, thing
, 0, behavior
)) {
615 Variant
HHVM_FUNCTION(fb_compact_serialize
, const Variant
& thing
) {
616 return fb_compact_serialize(thing
, FBCompactSerializeBehavior::Base
);
619 /* Check if there are enough bytes left in the buffer */
620 #define CHECK_ENOUGH(bytes, pos, num) do { \
621 if ((int)(bytes) > (int)((num) - (pos))) { \
622 return FB_UNSERIALIZE_UNEXPECTED_END; \
627 int fb_compact_unserialize_int64_from_buffer(
628 int64_t& out
, const char* buf
, int n
, int& p
) {
630 CHECK_ENOUGH(1, p
, n
);
631 uint64_t first
= (unsigned char)buf
[p
];
632 if ((first
& ~kInt7Mask
) == kInt7Prefix
) {
634 out
= first
& kInt7Mask
;
636 } else if ((first
& kInt13PrefixMsbMask
) == kInt13PrefixMsb
) {
637 CHECK_ENOUGH(2, p
, n
);
638 uint16_t val
= (uint16_t)ntohs(*reinterpret_cast<const uint16_t*>(buf
+ p
));
640 out
= val
& kInt13Mask
;
642 } else if (first
== (kCodePrefix
| FB_CS_INT16
)) {
644 CHECK_ENOUGH(2, p
, n
);
645 int16_t val
= (int16_t)ntohs(*reinterpret_cast<const int16_t*>(buf
+ p
));
649 } else if ((first
& kInt20PrefixMsbMask
) == kInt20PrefixMsb
) {
650 CHECK_ENOUGH(3, p
, n
);
652 memcpy(&b
, buf
+ p
, 3);
653 uint32_t val
= ntohl(b
);
655 out
= (val
>> 8) & kInt20Mask
;
657 } else if (first
== (kCodePrefix
| FB_CS_INT32
)) {
659 CHECK_ENOUGH(4, p
, n
);
660 int32_t val
= (int32_t)ntohl(*reinterpret_cast<const int32_t*>(buf
+ p
));
664 } else if ((first
& kInt54PrefixMsbMask
) == kInt54PrefixMsb
) {
665 CHECK_ENOUGH(7, p
, n
);
667 memcpy(&b
, buf
+ p
, 7);
668 uint64_t val
= ntohll(b
);
670 out
= (val
>> 8) & kInt54Mask
;
672 } else if (first
== (kCodePrefix
| FB_CS_INT64
)) {
674 CHECK_ENOUGH(8, p
, n
);
675 int64_t val
= (int64_t)ntohll(*reinterpret_cast<const int64_t*>(buf
+ p
));
680 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE
;
686 const StaticString
s_empty("");
688 int fb_compact_unserialize_from_buffer(
689 Variant
& out
, const char* buf
, int n
, int& p
) {
691 CHECK_ENOUGH(1, p
, n
);
692 int code
= (unsigned char)buf
[p
];
693 if ((code
& ~kCodeMask
) != kCodePrefix
||
694 (code
& kCodeMask
) == FB_CS_INT16
||
695 (code
& kCodeMask
) == FB_CS_INT32
||
696 (code
& kCodeMask
) == FB_CS_INT64
) {
699 int err
= fb_compact_unserialize_int64_from_buffer(val
, buf
, n
, p
);
723 CHECK_ENOUGH(8, p
, n
);
724 double d
= *reinterpret_cast<const double*>(buf
+ p
);
740 if (code
== FB_CS_STRING_N
) {
741 int err
= fb_compact_unserialize_int64_from_buffer(len
, buf
, n
, p
);
747 CHECK_ENOUGH(len
, p
, n
);
748 out
= Variant::attach(StringData::Make(buf
+ p
, len
, CopyString
));
756 Array arr
= Array::Create();
758 while (p
< n
&& buf
[p
] != (char)(kCodePrefix
| FB_CS_STOP
)) {
759 if (buf
[p
] == (char)(kCodePrefix
| FB_CS_SKIP
)) {
764 int err
= fb_compact_unserialize_from_buffer(value
, buf
, n
, p
);
773 CHECK_ENOUGH(1, p
, n
);
782 Array arr
= Array::Create();
783 while (p
< n
&& buf
[p
] != (char)(kCodePrefix
| FB_CS_STOP
)) {
785 int err
= fb_compact_unserialize_from_buffer(key
, buf
, n
, p
);
790 err
= fb_compact_unserialize_from_buffer(value
, buf
, n
, p
);
794 if (key
.getType() == KindOfInt64
) {
795 arr
.set(key
.toInt64(), value
);
796 } else if (key
.getType() == KindOfString
||
797 key
.getType() == KindOfPersistentString
) {
800 return FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE
;
805 CHECK_ENOUGH(1, p
, n
);
813 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE
;
819 Variant
fb_compact_unserialize(const char* str
, int len
,
821 VRefParam errcode
/* = uninit_variant */) {
825 int err
= fb_compact_unserialize_from_buffer(ret
, str
, len
, p
);
827 success
.assignIfRef(false);
828 errcode
.assignIfRef(err
);
831 success
.assignIfRef(true);
832 errcode
.assignIfRef(init_null());
836 Variant
HHVM_FUNCTION(fb_compact_unserialize
,
837 const Variant
& thing
, VRefParam success
,
838 VRefParam errcode
/* = uninit_variant */) {
839 if (!thing
.isString()) {
840 success
.assignIfRef(false);
841 errcode
.assignIfRef(FB_UNSERIALIZE_NONSTRING_VALUE
);
845 String s
= thing
.toString();
846 return fb_compact_unserialize(s
.data(), s
.size(), ref(success
),
850 ///////////////////////////////////////////////////////////////////////////////
852 bool HHVM_FUNCTION(fb_utf8ize
, VRefParam input
) {
853 String s
= input
.toString();
854 const char* const srcBuf
= s
.data();
855 int32_t srcLenBytes
= s
.size();
857 if (s
.size() < 0 || s
.size() > INT_MAX
) {
858 return false; // Too long.
861 // Preflight to avoid allocation if the entire input is valid.
863 for (srcPosBytes
= 0; srcPosBytes
< srcLenBytes
; /* U8_NEXT increments */) {
864 // This is lame, but gcc doesn't optimize U8_NEXT very well
865 if (srcBuf
[srcPosBytes
] > 0 && srcBuf
[srcPosBytes
] <= 0x7f) {
866 srcPosBytes
++; // U8_NEXT would increment this
869 UChar32 curCodePoint
;
870 // U8_NEXT() always advances srcPosBytes; save in case curCodePoint invalid
871 int32_t savedSrcPosBytes
= srcPosBytes
;
872 U8_NEXT(srcBuf
, srcPosBytes
, srcLenBytes
, curCodePoint
);
873 if (curCodePoint
<= 0) {
874 // curCodePoint invalid; back up so we'll fix it in the loop below.
875 srcPosBytes
= savedSrcPosBytes
;
880 if (srcPosBytes
== srcLenBytes
) {
885 // There are invalid bytes. Allocate memory, then copy the input, replacing
886 // invalid sequences with either the substitution character or nothing,
887 // depending on the value of RuntimeOption::Utf8izeReplace.
889 // Worst case, every remaining byte is invalid, taking a 3-byte substitution.
890 int32_t bytesRemaining
= srcLenBytes
- srcPosBytes
;
891 uint64_t dstMaxLenBytes
= srcPosBytes
+ (RuntimeOption::Utf8izeReplace
?
892 bytesRemaining
* U8_LENGTH(SUBSTITUTION_CHARACTER
) :
894 if (dstMaxLenBytes
> INT_MAX
) {
895 return false; // Too long.
897 String
dstStr(dstMaxLenBytes
, ReserveString
);
898 char *dstBuf
= dstStr
.mutableData();
900 // Copy valid bytes found so far as one solid block.
901 memcpy(dstBuf
, srcBuf
, srcPosBytes
);
903 // Iterate through the remaining bytes.
904 int32_t dstPosBytes
= srcPosBytes
; // already copied srcPosBytes
905 for (/* already init'd */; srcPosBytes
< srcLenBytes
; /* see U8_NEXT */) {
906 UChar32 curCodePoint
;
907 // This is lame, but gcc doesn't optimize U8_NEXT very well
908 if (srcBuf
[srcPosBytes
] > 0 && srcBuf
[srcPosBytes
] <= 0x7f) {
909 curCodePoint
= srcBuf
[srcPosBytes
++]; // U8_NEXT would increment
911 U8_NEXT(srcBuf
, srcPosBytes
, srcLenBytes
, curCodePoint
);
913 if (curCodePoint
<= 0) {
914 // Invalid UTF-8 sequence.
915 // N.B. We consider a null byte an invalid sequence.
916 if (!RuntimeOption::Utf8izeReplace
) {
917 continue; // Omit invalid sequence
919 curCodePoint
= SUBSTITUTION_CHARACTER
; // Replace invalid sequences
921 // We know that resultBuffer > total possible length.
922 U8_APPEND_UNSAFE(dstBuf
, dstPosBytes
, curCodePoint
);
924 assert(dstPosBytes
<= dstMaxLenBytes
);
925 input
.assignIfRef(dstStr
.shrink(dstPosBytes
));
930 * Private utf8_strlen implementation.
932 * Returns count of code points in input, substituting 1 code point per invalid
935 * deprecated=true: instead return byte count on invalid UTF-8 sequence.
937 static int fb_utf8_strlen_impl(const String
& input
, bool deprecated
) {
938 // Count, don't modify.
939 int32_t sourceLength
= input
.size();
940 const char* const sourceBuffer
= input
.data();
941 int64_t num_code_points
= 0;
943 for (int32_t sourceOffset
= 0; sourceOffset
< sourceLength
; ) {
944 UChar32 sourceCodePoint
;
945 // U8_NEXT() is guaranteed to advance sourceOffset by 1-4 each time it's
947 U8_NEXT(sourceBuffer
, sourceOffset
, sourceLength
, sourceCodePoint
);
948 if (deprecated
&& sourceCodePoint
< 0) {
949 return sourceLength
; // return byte count on invalid sequence
953 return num_code_points
;
956 int64_t HHVM_FUNCTION(fb_utf8_strlen
, const String
& input
) {
957 return fb_utf8_strlen_impl(input
, /* deprecated */ false);
960 int64_t HHVM_FUNCTION(fb_utf8_strlen_deprecated
, const String
& input
) {
961 return fb_utf8_strlen_impl(input
, /* deprecated */ true);
965 * Private helper; requires non-negative firstCodePoint and desiredCodePoints.
967 static String
fb_utf8_substr_simple(const String
& str
,
968 int32_t firstCodePoint
,
969 int32_t numDesiredCodePoints
) {
970 const char* const srcBuf
= str
.data();
971 int32_t srcLenBytes
= str
.size(); // May truncate; checked before use below.
973 assert(firstCodePoint
>= 0); // Wrapper fixes up negative starting positions.
974 assert(numDesiredCodePoints
> 0); // Wrapper fixes up negative/zero length.
975 if (str
.size() <= 0 ||
976 str
.size() > INT_MAX
||
977 firstCodePoint
>= srcLenBytes
) {
978 return empty_string();
981 // Cannot be more code points than bytes in input. This typically reduces
982 // the INT_MAX default value to something more reasonable.
983 numDesiredCodePoints
= std::min(numDesiredCodePoints
,
984 srcLenBytes
- firstCodePoint
);
986 // Pre-allocate the result.
987 // the worst case can come from one of two sources:
988 // - every code point could be the substitution char (3 bytes)
989 // giving us numDesiredCodePoints * 3
990 // - every code point could be 4 bytes long, giving us
991 // numDesiredCodePoints * 4 - but capped by the length of the input
992 uint64_t dstMaxLenBytes
=
993 std::min((uint64_t)numDesiredCodePoints
* 4,
994 (uint64_t)srcLenBytes
- firstCodePoint
);
995 dstMaxLenBytes
= std::max(dstMaxLenBytes
,
996 (uint64_t)numDesiredCodePoints
*
997 U8_LENGTH(SUBSTITUTION_CHARACTER
));
998 if (dstMaxLenBytes
> INT_MAX
) {
999 return empty_string(); // Too long.
1001 String
dstStr(dstMaxLenBytes
, ReserveString
);
1002 char* dstBuf
= dstStr
.mutableData();
1003 int32_t dstPosBytes
= 0;
1005 // Iterate through src's codepoints; srcPosBytes is incremented by U8_NEXT.
1006 for (int32_t srcPosBytes
= 0, srcPosCodePoints
= 0;
1007 srcPosBytes
< srcLenBytes
&& // more available
1008 srcPosCodePoints
< firstCodePoint
+ numDesiredCodePoints
; // want more
1009 srcPosCodePoints
++) {
1011 // U8_NEXT() advances sourceBytePos by 1-4 each time it's invoked.
1012 UChar32 curCodePoint
;
1013 U8_NEXT(srcBuf
, srcPosBytes
, srcLenBytes
, curCodePoint
);
1015 if (srcPosCodePoints
>= firstCodePoint
) {
1016 // Copy this code point into the result.
1017 if (curCodePoint
< 0) {
1018 curCodePoint
= SUBSTITUTION_CHARACTER
; // replace invalid sequences
1020 // We know that resultBuffer > total possible length.
1021 // U8_APPEND_UNSAFE updates dstPosBytes.
1022 U8_APPEND_UNSAFE(dstBuf
, dstPosBytes
, curCodePoint
);
1026 assert(dstPosBytes
<= dstMaxLenBytes
);
1027 if (dstPosBytes
> 0) {
1028 dstStr
.shrink(dstPosBytes
);
1031 return empty_string();
1034 String
HHVM_FUNCTION(fb_utf8_substr
, const String
& str
, int64_t start
,
1035 int64_t length
/* = INT_MAX */) {
1036 if (length
> INT_MAX
) {
1039 // For negative start or length, calculate start and length values
1040 // based on total code points.
1041 if (start
< 0 || length
< 0) {
1042 // Get number of code points assuming we substitute invalid sequences.
1043 Variant utf8StrlenResult
= HHVM_FN(fb_utf8_strlen
)(str
);
1044 int32_t sourceNumCodePoints
= utf8StrlenResult
.toInt32();
1047 // Negative means first character is start'th code point from end.
1048 // e.g., -1 means start with the last code point.
1049 start
= sourceNumCodePoints
+ start
; // adding negative start
1052 // Negative means omit last abs(length) code points.
1053 length
= sourceNumCodePoints
- start
+ length
; // adding negative length
1056 if (start
< 0 || length
<= 0) {
1057 return empty_string(); // Empty result
1060 return fb_utf8_substr_simple(str
, start
, length
);
1063 ///////////////////////////////////////////////////////////////////////////////
1065 bool HHVM_FUNCTION(fb_intercept
, const String
& name
, const Variant
& handler
,
1066 const Variant
& data
/* = uninit_variant */) {
1067 return register_intercept(name
, handler
, data
);
1070 bool is_dangerous_varenv_function(const StringData
* name
) {
1071 auto const f
= Unit::lookupFunc(name
);
1072 // Functions can which can access the caller's frame are always builtin, so if
1073 // its not already defined, we know it can't be one.
1074 return f
&& f
->accessesCallerFrame();
1077 bool HHVM_FUNCTION(fb_rename_function
, const String
& orig_func_name
,
1078 const String
& new_func_name
) {
1079 if (orig_func_name
.empty() || new_func_name
.empty() ||
1080 orig_func_name
.get()->isame(new_func_name
.get())) {
1081 throw_invalid_argument("unable to rename %s", orig_func_name
.data());
1085 if (!function_exists(orig_func_name
)) {
1086 raise_warning("fb_rename_function(%s, %s) failed: %s does not exist!",
1087 orig_func_name
.data(), new_func_name
.data(),
1088 orig_func_name
.data());
1092 if (is_dangerous_varenv_function(orig_func_name
.get())) {
1094 "fb_rename_function(%s, %s) failed: rename of functions that "
1095 "affect variable environments is not allowed",
1096 orig_func_name
.data(), new_func_name
.data());
1100 if (function_exists(new_func_name
)) {
1101 if (new_func_name
.data()[0] != '1') {
1102 raise_warning("fb_rename_function(%s, %s) failed: %s already exists!",
1103 orig_func_name
.data(), new_func_name
.data(),
1104 new_func_name
.data());
1109 rename_function(orig_func_name
, new_func_name
);
1113 ///////////////////////////////////////////////////////////////////////////////
1114 // call_user_func extensions
1115 // Linked in via fb.json.idl for now - Need OptFunc solution...
1117 Array
HHVM_FUNCTION(fb_call_user_func_safe
,
1118 const Variant
& function
,
1119 const Array
& argv
) {
1120 return HHVM_FN(fb_call_user_func_array_safe
)(function
, argv
);
1123 Variant
HHVM_FUNCTION(fb_call_user_func_safe_return
,
1124 const Variant
& function
,
1126 const Array
& argv
) {
1127 if (is_callable(function
)) {
1128 return vm_call_user_func(function
, argv
);
1133 Array
HHVM_FUNCTION(fb_call_user_func_array_safe
,
1134 const Variant
& function
,
1135 const Array
& params
) {
1136 if (is_callable(function
)) {
1137 return make_packed_array(true, vm_call_user_func(function
, params
));
1139 return make_packed_array(false, uninit_variant
);
1142 ///////////////////////////////////////////////////////////////////////////////
1144 Variant
HHVM_FUNCTION(fb_get_code_coverage
, bool flush
) {
1145 ThreadInfo
*ti
= ThreadInfo::s_threadInfo
.getNoCheck();
1146 if (ti
->m_reqInjectionData
.getCoverage()) {
1147 Array ret
= ti
->m_coverage
->Report();
1149 ti
->m_coverage
->Reset();
1156 void HHVM_FUNCTION(fb_enable_code_coverage
) {
1157 ThreadInfo
*ti
= ThreadInfo::s_threadInfo
.getNoCheck();
1158 ti
->m_coverage
->Reset();
1159 ti
->m_reqInjectionData
.setCoverage(true);;
1160 if (g_context
->isNested()) {
1161 raise_notice("Calling fb_enable_code_coverage from a nested "
1162 "VM instance may cause unpredicable results");
1164 throw VMSwitchModeBuiltin();
1167 Variant
HHVM_FUNCTION(fb_disable_code_coverage
) {
1168 ThreadInfo
*ti
= ThreadInfo::s_threadInfo
.getNoCheck();
1169 ti
->m_reqInjectionData
.setCoverage(false);
1170 Array ret
= ti
->m_coverage
->Report();
1171 ti
->m_coverage
->Reset();
1175 ///////////////////////////////////////////////////////////////////////////////
1177 bool HHVM_FUNCTION(fb_output_compression
, bool new_value
) {
1178 Transport
*transport
= g_context
->getTransport();
1180 bool rv
= transport
->isCompressionEnabled();
1182 transport
->enableCompression();
1184 transport
->disableCompression();
1191 void HHVM_FUNCTION(fb_set_exit_callback
, const Variant
& function
) {
1192 g_context
->setExitCallback(function
);
1196 s_flush_stats("flush_stats"),
1197 s_chunk_stats("chunk_stats"),
1202 int64_t HHVM_FUNCTION(fb_get_last_flush_size
) {
1203 Transport
*transport
= g_context
->getTransport();
1204 return transport
? transport
->getLastChunkSentSize() : 0;
1207 extern Array
stat_impl(struct stat
*); // ext_file.cpp
1209 template<class Function
>
1210 static Variant
do_lazy_stat(Function dostat
, const String
& filename
) {
1212 if (dostat(File::TranslatePathWithFileCache(filename
).c_str(), &sb
)) {
1213 Logger::Verbose("%s/%d: %s", __FUNCTION__
, __LINE__
,
1214 folly::errnoStr(errno
).c_str());
1217 return stat_impl(&sb
);
1220 Variant
HHVM_FUNCTION(fb_lazy_lstat
, const String
& filename
) {
1221 if (!FileUtil::checkPathAndWarn(filename
, __FUNCTION__
+ 2, 1)) {
1224 return do_lazy_stat(StatCache::lstat
, filename
);
1227 Variant
HHVM_FUNCTION(fb_lazy_realpath
, const String
& filename
) {
1228 if (!FileUtil::checkPathAndWarn(filename
, __FUNCTION__
+ 2, 1)) {
1232 return StatCache::realpath(filename
.c_str());
1235 ///////////////////////////////////////////////////////////////////////////////
1239 // TODO(8117903): Unused; remove after updating www side.
1242 ///////////////////////////////////////////////////////////////////////////////
1244 struct FBExtension
: Extension
{
1245 FBExtension(): Extension("fb", "1.0.0") {}
1247 void moduleInit() override
{
1248 HHVM_RC_BOOL_SAME(HHVM_FACEBOOK
);
1249 HHVM_RC_INT_SAME(FB_UNSERIALIZE_NONSTRING_VALUE
);
1250 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_END
);
1251 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE
);
1252 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE
);
1254 HHVM_RC_INT(FB_SERIALIZE_HACK_ARRAYS
, k_FB_SERIALIZE_HACK_ARRAYS
);
1256 HHVM_FE(fb_serialize
);
1257 HHVM_FE(fb_unserialize
);
1258 HHVM_FE(fb_compact_serialize
);
1259 HHVM_FE(fb_compact_unserialize
);
1260 HHVM_FE(fb_utf8ize
);
1261 HHVM_FE(fb_utf8_strlen
);
1262 HHVM_FE(fb_utf8_strlen_deprecated
);
1263 HHVM_FE(fb_utf8_substr
);
1264 HHVM_FE(fb_intercept
);
1265 HHVM_FE(fb_rename_function
);
1266 HHVM_FE(fb_get_code_coverage
);
1267 HHVM_FE(fb_enable_code_coverage
);
1268 HHVM_FE(fb_disable_code_coverage
);
1269 HHVM_FE(fb_output_compression
);
1270 HHVM_FE(fb_set_exit_callback
);
1271 HHVM_FE(fb_get_last_flush_size
);
1272 HHVM_FE(fb_lazy_lstat
);
1273 HHVM_FE(fb_lazy_realpath
);
1274 HHVM_FE(fb_call_user_func_safe
);
1275 HHVM_FE(fb_call_user_func_safe_return
);
1276 HHVM_FE(fb_call_user_func_array_safe
);
1282 ///////////////////////////////////////////////////////////////////////////////