2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/fb/ext_fb.h"
21 #include <unicode/uchar.h>
22 #include <unicode/utf8.h>
28 #include <folly/String.h>
29 #include <folly/portability/Sockets.h>
31 #include "hphp/util/htonll.h"
32 #include "hphp/util/logger.h"
33 #include "hphp/runtime/base/array-init.h"
34 #include "hphp/runtime/base/builtin-functions.h"
35 #include "hphp/runtime/base/code-coverage.h"
36 #include "hphp/runtime/base/externals.h"
37 #include "hphp/runtime/base/file.h"
38 #include "hphp/runtime/base/file-util.h"
39 #include "hphp/runtime/base/plain-file.h"
40 #include "hphp/runtime/base/unit-cache.h"
41 #include "hphp/runtime/base/intercept.h"
42 #include "hphp/runtime/base/runtime-option.h"
43 #include "hphp/runtime/base/stat-cache.h"
44 #include "hphp/runtime/base/string-buffer.h"
45 #include "hphp/runtime/base/string-util.h"
46 #include "hphp/runtime/base/request-info.h"
47 #include "hphp/runtime/base/tv-type.h"
48 #include "hphp/runtime/ext/std/ext_std_function.h"
49 #include "hphp/runtime/ext/fb/FBSerialize/FBSerialize.h"
50 #include "hphp/runtime/ext/fb/VariantController.h"
51 #include "hphp/runtime/vm/unwind.h"
52 #include "hphp/zend/zend-string.h"
56 // fb_serialize options
57 const int64_t k_FB_SERIALIZE_HACK_ARRAYS
= 1<<1;
59 ///////////////////////////////////////////////////////////////////////////////
61 static const UChar32 SUBSTITUTION_CHARACTER
= 0xFFFD;
63 #define FB_UNSERIALIZE_NONSTRING_VALUE 0x0001
64 #define FB_UNSERIALIZE_UNEXPECTED_END 0x0002
65 #define FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE 0x0003
66 #define FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE 0x0004
69 # define HHVM_FACEBOOK true
71 # define HHVM_FACEBOOK false
74 ///////////////////////////////////////////////////////////////////////////////
76 /* enum of thrift types */
97 /* Return the smallest size int that can store the value */
98 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
99 ((x) == ((int16_t)x)) ? 2 : \
100 ((x) == ((int32_t)x)) ? 4 : 8)
102 /* Return the smallest (supported) unsigned length that can store the value */
103 #define LEN_SIZE(x) ((((unsigned)x) == ((uint8_t)x)) ? 1 : 4)
105 Variant
HHVM_FUNCTION(fb_serialize
, const Variant
& thing
, int64_t options
) {
107 if (options
& k_FB_SERIALIZE_HACK_ARRAYS
) {
108 size_t len
= HPHP::serialize
109 ::FBSerializer
<VariantControllerUsingHackArrays
>
110 ::serializedSize(thing
);
111 String
s(len
, ReserveString
);
113 ::FBSerializer
<VariantControllerUsingHackArrays
>
114 ::serialize(thing
, s
.mutableData());
119 HPHP::serialize::FBSerializer
<VariantController
>::serializedSize(thing
);
120 String
s(len
, ReserveString
);
121 HPHP::serialize::FBSerializer
<VariantController
>::serialize(
122 thing
, s
.mutableData());
126 } catch (const HPHP::serialize::KeysetSerializeError
&) {
127 SystemLib::throwInvalidArgumentExceptionObject(
128 "Keysets cannot be serialized with fb_serialize"
130 } catch (const HPHP::serialize::HackArraySerializeError
&) {
131 SystemLib::throwInvalidArgumentExceptionObject(
132 "Serializing Hack arrays requires the FB_SERIALIZE_HACK_ARRAYS "
133 "option to be provided"
135 } catch (const HPHP::serialize::SerializeError
&) {
140 Variant
HHVM_FUNCTION(fb_unserialize
,
141 const Variant
& thing
,
144 if (thing
.isString()) {
145 String sthing
= thing
.toString();
147 if (sthing
.size() && (sthing
.data()[0] & 0x80)) {
148 return fb_compact_unserialize(sthing
.data(), sthing
.size(),
151 return fb_unserialize(sthing
.data(), sthing
.size(), success
, options
);
155 success
.assignIfRef(false);
159 Variant
fb_unserialize(const char* str
,
164 if (options
& k_FB_SERIALIZE_HACK_ARRAYS
) {
165 auto res
= HPHP::serialize
166 ::FBUnserializer
<VariantControllerUsingHackArrays
>
167 ::unserialize(folly::StringPiece(str
, len
));
168 success
.assignIfRef(true);
171 auto res
= HPHP::serialize::FBUnserializer
<VariantController
>
172 ::unserialize(folly::StringPiece(str
, len
));
173 success
.assignIfRef(true);
176 } catch (const HPHP::serialize::UnserializeError
&) {
177 success
.assignIfRef(false);
182 ///////////////////////////////////////////////////////////////////////////////
185 * FB Compact Serialize
186 * ====================
188 * === Compatibility with fb_unserialize ===
190 * Check the high bit in the first byte of the serialized string.
191 * If it's set, the string is fb_compact_serialize'd, otherwise it's
196 * A value is serialized as a string <c> <data> where c is a byte (0xf0 | code),
199 * 0 (INT16): data is 2 bytes, network order signed int16
200 * 1 (INT32): data is 4 bytes, network order signed int32
201 * 2 (INT64): data is 8 bytes, network order signed int64
202 * All of these represent an int64 value.
204 * 3 (NULL): no data, null value
207 * 5 (FALSE): no data, boolean value
209 * 6 (DOUBLE): data is 8 bytes, double value
211 * 7 (STRING_0): no data
212 * 8 (STRING_1): one char of data
213 * 9 (STRING_N): followed by n as a serialized int64, followed by n characters
214 * All of these represent a string value.
216 * 10 (LIST_MAP): followed by serialized values until STOP is seen.
217 * Represents a map with numeric keys 0, 1, ..., n-1 (but see SKIP below).
219 * 11 (MAP): followed by serialized key/value pairs until STOP
220 * is seen. Represents a map with arbitrary int64 or string keys.
223 * Marks the end of a LIST or a MAP.
226 * If seen as an entry in a LIST_MAP, the next index in the sequence will
227 * be skipped. E.g. array(0 => 'a', 1 => 'b', 3 => 'c) will be encoded as
228 * (LIST_MAP, 'a', 'b', SKIP, 'c') instead of
229 * (MAP, 0, 'a', 1, 'b', 3, 'c').
231 * 14 (VECTOR): followed by n serialized values until STOP is seen.
232 * Represents a vector of n values.
234 * In addition, if <c> & 0xf0 != 0xf0, most significant bits of <c> mean:
236 * - 0....... 7-bit unsigned int
237 * (NOTE: not used for the sole int value due to the compatibility
239 * - 10...... + 6 more bytes, 54-bit unsigned int
240 * - 110..... + 1 more byte, 13-bit unsigned int
241 * - 1110.... + 2 more bytes, 20-bit unsigned int
243 * All of these represent an int64 value.
246 enum FbCompactSerializeCode
{
267 const uint64_t kInt7Mask
= 0x7f;
268 const uint64_t kInt7Prefix
= 0x00;
270 // 2 bytes: 110<13 bits>
271 const uint64_t kInt13Mask
= (1ULL << 13) - 1;
272 const uint64_t kInt13PrefixMsbMask
= 0xe0;
273 const uint64_t kInt13PrefixMsb
= 0xc0;
274 const uint64_t kInt13Prefix
= kInt13PrefixMsb
<< (1 * 8);
276 // 3 bytes: 1110<20 bits>
277 const uint64_t kInt20Mask
= (1ULL << 20) - 1;
278 const uint64_t kInt20PrefixMsbMask
= 0xf0;
279 const uint64_t kInt20PrefixMsb
= 0xe0;
280 const uint64_t kInt20Prefix
= kInt20PrefixMsb
<< (2 * 8);
282 // 7 bytes: 10<54 bits>
283 const uint64_t kInt54Mask
= (1ULL << 54) - 1;
284 const uint64_t kInt54PrefixMsbMask
= 0xc0;
285 const uint64_t kInt54PrefixMsb
= 0x80;
286 const uint64_t kInt54Prefix
= kInt54PrefixMsb
<< (6 * 8);
288 // 1 byte: 1111<4 bits>
289 const uint64_t kCodeMask
= 0x0f;
290 const uint64_t kCodePrefix
= 0xf0;
292 static void fb_compact_serialize_code(StringBuffer
& sb
,
293 FbCompactSerializeCode code
) {
294 assertx(code
== (code
& kCodeMask
));
295 uint8_t v
= (kCodePrefix
| code
);
296 sb
.append(reinterpret_cast<char*>(&v
), 1);
299 static void fb_compact_serialize_int64(StringBuffer
& sb
, int64_t val
) {
300 if (val
>= 0 && (uint64_t)val
<= kInt7Mask
) {
302 sb
.append(reinterpret_cast<char*>(&nval
), 1);
304 } else if (val
>= 0 && (uint64_t)val
<= kInt13Mask
) {
305 uint16_t nval
= htons(kInt13Prefix
| val
);
306 sb
.append(reinterpret_cast<char*>(&nval
), 2);
308 } else if (val
== (int64_t)(int16_t)val
) {
309 fb_compact_serialize_code(sb
, FB_CS_INT16
);
310 uint16_t nval
= htons(val
);
311 sb
.append(reinterpret_cast<char*>(&nval
), 2);
313 } else if (val
>= 0 && (uint64_t)val
<= kInt20Mask
) {
314 uint32_t nval
= htonl(kInt20Prefix
| val
);
315 // Skip most significant byte
316 sb
.append(reinterpret_cast<char*>(&nval
) + 1, 3);
318 } else if (val
== (int64_t)(int32_t)val
) {
319 fb_compact_serialize_code(sb
, FB_CS_INT32
);
320 uint32_t nval
= htonl(val
);
321 sb
.append(reinterpret_cast<char*>(&nval
), 4);
323 } else if (val
>= 0 && (uint64_t)val
<= kInt54Mask
) {
324 uint64_t nval
= htonll(kInt54Prefix
| val
);
325 // Skip most significant byte
326 sb
.append(reinterpret_cast<char*>(&nval
) + 1, 7);
329 fb_compact_serialize_code(sb
, FB_CS_INT64
);
330 uint64_t nval
= htonll(val
);
331 sb
.append(reinterpret_cast<char*>(&nval
), 8);
335 static void fb_compact_serialize_string(StringBuffer
& sb
, const String
& str
) {
336 int len
= str
.size();
338 fb_compact_serialize_code(sb
, FB_CS_STRING_0
);
341 fb_compact_serialize_code(sb
, FB_CS_STRING_1
);
343 fb_compact_serialize_code(sb
, FB_CS_STRING_N
);
344 fb_compact_serialize_int64(sb
, len
);
346 sb
.append(str
.data(), len
);
350 static bool fb_compact_serialize_is_list(const Array
& arr
, int64_t& index_limit
) {
351 index_limit
= arr
.size();
352 int64_t max_index
= 0;
353 for (ArrayIter
it(arr
); it
; ++it
) {
354 Variant key
= it
.first();
355 if (!key
.isNumeric()) {
358 int64_t index
= key
.toInt64();
359 if (index
< max_index
) {
362 if (index
> max_index
) {
367 if (max_index
>= arr
.size() * 2) {
368 // Might as well store it as a map
372 index_limit
= max_index
+ 1;
376 static int fb_compact_serialize_variant(
377 StringBuffer
& sd
, const Variant
& var
, int depth
);
379 static void fb_compact_serialize_array_as_list_map(
380 StringBuffer
& sb
, const Array
& arr
, int64_t index_limit
, int depth
) {
381 fb_compact_serialize_code(sb
, FB_CS_LIST_MAP
);
382 for (int64_t i
= 0; i
< index_limit
; ++i
) {
384 fb_compact_serialize_variant(sb
, arr
[i
], depth
+ 1);
386 fb_compact_serialize_code(sb
, FB_CS_SKIP
);
389 fb_compact_serialize_code(sb
, FB_CS_STOP
);
392 static void fb_compact_serialize_vec(
393 StringBuffer
& sb
, const Array
& arr
, int depth
) {
394 fb_compact_serialize_code(sb
, FB_CS_LIST_MAP
);
395 PackedArray::IterateV(
398 fb_compact_serialize_variant(sb
, VarNR(v
), depth
+ 1);
401 fb_compact_serialize_code(sb
, FB_CS_STOP
);
404 static void fb_compact_serialize_array_as_map(
405 StringBuffer
& sb
, const Array
& arr
, int depth
) {
406 fb_compact_serialize_code(sb
, FB_CS_MAP
);
409 [&](Cell k
, TypedValue v
) {
410 if (isStringType(k
.m_type
)) {
411 fb_compact_serialize_string(sb
, StrNR
{k
.m_data
.pstr
});
413 assertx(isIntType(k
.m_type
));
414 fb_compact_serialize_int64(sb
, k
.m_data
.num
);
416 fb_compact_serialize_variant(sb
, VarNR(v
), depth
+ 1);
419 fb_compact_serialize_code(sb
, FB_CS_STOP
);
422 static void fb_compact_serialize_keyset(
423 StringBuffer
& sb
, const Array
& arr
) {
424 fb_compact_serialize_code(sb
, FB_CS_MAP
);
426 SetArray::asSet(arr
.get()),
428 if (isStringType(v
.m_type
)) {
429 fb_compact_serialize_string(sb
, StrNR
{v
.m_data
.pstr
});
430 fb_compact_serialize_string(sb
, StrNR
{v
.m_data
.pstr
});
432 assertx(v
.m_type
== KindOfInt64
);
433 fb_compact_serialize_int64(sb
, v
.m_data
.num
);
434 fb_compact_serialize_int64(sb
, v
.m_data
.num
);
438 fb_compact_serialize_code(sb
, FB_CS_STOP
);
441 static int fb_compact_serialize_variant(
442 StringBuffer
& sb
, const Variant
& var
, int depth
) {
447 switch (var
.getType()) {
450 fb_compact_serialize_code(sb
, FB_CS_NULL
);
455 fb_compact_serialize_code(sb
, FB_CS_TRUE
);
457 fb_compact_serialize_code(sb
, FB_CS_FALSE
);
462 fb_compact_serialize_int64(sb
, var
.toInt64());
466 fb_compact_serialize_code(sb
, FB_CS_DOUBLE
);
467 double d
= var
.toDouble();
468 sb
.append(reinterpret_cast<char*>(&d
), 8);
472 case KindOfPersistentString
:
476 fb_compact_serialize_string(sb
, var
.toString());
479 case KindOfPersistentVec
:
481 Array arr
= var
.toArray();
482 assertx(arr
->isVecArray());
483 fb_compact_serialize_vec(sb
, std::move(arr
), depth
);
487 case KindOfPersistentDict
:
489 Array arr
= var
.toArray();
490 assertx(arr
->isDict());
491 fb_compact_serialize_array_as_map(sb
, std::move(arr
), depth
);
495 case KindOfPersistentKeyset
:
497 Array arr
= var
.toArray();
498 assertx(arr
->isKeyset());
499 fb_compact_serialize_keyset(sb
, std::move(arr
));
503 case KindOfPersistentShape
:
504 case KindOfShape
: { // TODO(T31134050)
505 Array arr
= var
.toArray();
506 assertx(arr
->isDictOrDArray());
507 fb_compact_serialize_array_as_map(sb
, std::move(arr
), depth
);
511 case KindOfPersistentArray
:
513 Array arr
= var
.toArray();
514 assertx(arr
->isPHPArray());
516 if (fb_compact_serialize_is_list(arr
, index_limit
)) {
517 fb_compact_serialize_array_as_list_map(
518 sb
, std::move(arr
), index_limit
, depth
);
520 fb_compact_serialize_array_as_map(sb
, std::move(arr
), depth
);
525 case KindOfClsMeth
: {
526 Array arr
= var
.toArray();
527 if (RuntimeOption::EvalHackArrDVArrs
) {
528 assertx(arr
->isVecArray());
529 fb_compact_serialize_vec(sb
, std::move(arr
), depth
);
531 assertx(arr
->isPHPArray());
533 fb_compact_serialize_is_list(arr
, index_limit
);
534 fb_compact_serialize_array_as_list_map(
535 sb
, std::move(arr
), index_limit
, depth
);
543 fb_compact_serialize_code(sb
, FB_CS_NULL
);
545 "fb_compact_serialize(): unable to serialize "
546 "object/resource/ref/func/class"
554 String
fb_compact_serialize(const Variant
& thing
) {
556 * If thing is a single int value [0, 127] normally we would serialize
557 * it as a single byte (7 bit unsigned int).
559 * However, we want highest bit of the first byte to always be set so
560 * that we can tell if the string is fb_serialize'd or fb_compact_serialize'd.
562 * So we force to serialize it as 13 bit unsigned int instead.
564 if (thing
.getType() == KindOfInt64
) {
565 int64_t val
= thing
.toInt64();
566 if (val
>= 0 && (uint64_t)val
<= kInt7Mask
) {
567 String
s(2, ReserveString
);
568 *(uint16_t*)(s
.mutableData()) = (uint16_t)htons(kInt13Prefix
| val
);
575 if (fb_compact_serialize_variant(sb
, thing
, 0)) {
582 Variant
HHVM_FUNCTION(fb_compact_serialize
, const Variant
& thing
) {
583 return fb_compact_serialize(thing
);
586 /* Check if there are enough bytes left in the buffer */
587 #define CHECK_ENOUGH(bytes, pos, num) do { \
588 if ((int)(bytes) > (int)((num) - (pos))) { \
589 return FB_UNSERIALIZE_UNEXPECTED_END; \
594 int fb_compact_unserialize_int64_from_buffer(
595 int64_t& out
, const char* buf
, int n
, int& p
) {
597 CHECK_ENOUGH(1, p
, n
);
598 uint64_t first
= (unsigned char)buf
[p
];
599 if ((first
& ~kInt7Mask
) == kInt7Prefix
) {
601 out
= first
& kInt7Mask
;
603 } else if ((first
& kInt13PrefixMsbMask
) == kInt13PrefixMsb
) {
604 CHECK_ENOUGH(2, p
, n
);
605 uint16_t val
= (uint16_t)ntohs(*reinterpret_cast<const uint16_t*>(buf
+ p
));
607 out
= val
& kInt13Mask
;
609 } else if (first
== (kCodePrefix
| FB_CS_INT16
)) {
611 CHECK_ENOUGH(2, p
, n
);
612 int16_t val
= (int16_t)ntohs(*reinterpret_cast<const int16_t*>(buf
+ p
));
616 } else if ((first
& kInt20PrefixMsbMask
) == kInt20PrefixMsb
) {
617 CHECK_ENOUGH(3, p
, n
);
619 memcpy(&b
, buf
+ p
, 3);
620 uint32_t val
= ntohl(b
);
622 out
= (val
>> 8) & kInt20Mask
;
624 } else if (first
== (kCodePrefix
| FB_CS_INT32
)) {
626 CHECK_ENOUGH(4, p
, n
);
627 int32_t val
= (int32_t)ntohl(*reinterpret_cast<const int32_t*>(buf
+ p
));
631 } else if ((first
& kInt54PrefixMsbMask
) == kInt54PrefixMsb
) {
632 CHECK_ENOUGH(7, p
, n
);
634 memcpy(&b
, buf
+ p
, 7);
635 uint64_t val
= ntohll(b
);
637 out
= (val
>> 8) & kInt54Mask
;
639 } else if (first
== (kCodePrefix
| FB_CS_INT64
)) {
641 CHECK_ENOUGH(8, p
, n
);
642 int64_t val
= (int64_t)ntohll(*reinterpret_cast<const int64_t*>(buf
+ p
));
647 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE
;
653 const StaticString
s_empty("");
655 int fb_compact_unserialize_from_buffer(
656 Variant
& out
, const char* buf
, int n
, int& p
) {
658 CHECK_ENOUGH(1, p
, n
);
659 int code
= (unsigned char)buf
[p
];
660 if ((code
& ~kCodeMask
) != kCodePrefix
||
661 (code
& kCodeMask
) == FB_CS_INT16
||
662 (code
& kCodeMask
) == FB_CS_INT32
||
663 (code
& kCodeMask
) == FB_CS_INT64
) {
666 int err
= fb_compact_unserialize_int64_from_buffer(val
, buf
, n
, p
);
690 CHECK_ENOUGH(8, p
, n
);
691 double d
= *reinterpret_cast<const double*>(buf
+ p
);
707 if (code
== FB_CS_STRING_N
) {
708 int err
= fb_compact_unserialize_int64_from_buffer(len
, buf
, n
, p
);
714 CHECK_ENOUGH(len
, p
, n
);
715 out
= Variant::attach(StringData::Make(buf
+ p
, len
, CopyString
));
722 Array arr
= Array::Create();
724 bool should_log_skip
=
725 RuntimeOption::EvalHackArrCompatCompactSerializeNotices
;
726 while (p
< n
&& buf
[p
] != (char)(kCodePrefix
| FB_CS_STOP
)) {
727 if (buf
[p
] == (char)(kCodePrefix
| FB_CS_SKIP
)) {
728 if (UNLIKELY(should_log_skip
)) {
729 should_log_skip
= false;
730 raise_hackarr_compat_notice(
731 "fb_compact_unserialize(): vector cannot contain skip");
738 int err
= fb_compact_unserialize_from_buffer(value
, buf
, n
, p
);
746 CHECK_ENOUGH(1, p
, n
);
755 Array arr
= Array::CreateDArray();
757 while (p
< n
&& buf
[p
] != (char)(kCodePrefix
| FB_CS_STOP
)) {
758 if (buf
[p
] == (char)(kCodePrefix
| FB_CS_SKIP
)) {
763 int err
= fb_compact_unserialize_from_buffer(value
, buf
, n
, p
);
772 CHECK_ENOUGH(1, p
, n
);
781 Array arr
= Array::CreateDArray();
782 while (p
< n
&& buf
[p
] != (char)(kCodePrefix
| FB_CS_STOP
)) {
784 int err
= fb_compact_unserialize_from_buffer(key
, buf
, n
, p
);
789 err
= fb_compact_unserialize_from_buffer(value
, buf
, n
, p
);
793 if (key
.getType() == KindOfInt64
) {
794 arr
.set(key
.toInt64(), value
);
795 } else if (key
.getType() == KindOfString
||
796 key
.getType() == KindOfPersistentString
) {
797 const auto arrkey
= arr
.convertKey
<IntishCast::Cast
>(key
);
798 arr
.set(arrkey
, *value
.asTypedValue());
800 return FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE
;
805 CHECK_ENOUGH(1, p
, n
);
813 return FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE
;
819 Variant
fb_compact_unserialize(const char* str
, int len
,
821 VRefParam errcode
/* = uninit_variant */) {
825 int err
= fb_compact_unserialize_from_buffer(ret
, str
, len
, p
);
827 success
.assignIfRef(false);
828 errcode
.assignIfRef(err
);
831 success
.assignIfRef(true);
832 errcode
.assignIfRef(init_null());
836 Variant
HHVM_FUNCTION(fb_compact_unserialize
,
837 const Variant
& thing
, VRefParam success
,
838 VRefParam errcode
/* = uninit_variant */) {
839 if (!thing
.isString()) {
840 success
.assignIfRef(false);
841 errcode
.assignIfRef(FB_UNSERIALIZE_NONSTRING_VALUE
);
845 String s
= thing
.toString();
846 return fb_compact_unserialize(s
.data(), s
.size(), ref(success
),
850 ///////////////////////////////////////////////////////////////////////////////
852 bool HHVM_FUNCTION(fb_utf8ize
, VRefParam input
) {
853 String s
= input
.toString();
854 const char* const srcBuf
= s
.data();
855 int32_t srcLenBytes
= s
.size();
857 if (s
.size() < 0 || s
.size() > INT_MAX
) {
858 return false; // Too long.
861 // Preflight to avoid allocation if the entire input is valid.
863 for (srcPosBytes
= 0; srcPosBytes
< srcLenBytes
; /* U8_NEXT increments */) {
864 // This is lame, but gcc doesn't optimize U8_NEXT very well
865 if (srcBuf
[srcPosBytes
] != 0 && !(srcBuf
[srcPosBytes
] & 0x80)) {
866 srcPosBytes
++; // U8_NEXT would increment this
869 UChar32 curCodePoint
;
870 // U8_NEXT() always advances srcPosBytes; save in case curCodePoint invalid
871 int32_t savedSrcPosBytes
= srcPosBytes
;
872 U8_NEXT(srcBuf
, srcPosBytes
, srcLenBytes
, curCodePoint
);
873 if (curCodePoint
<= 0) {
874 // curCodePoint invalid; back up so we'll fix it in the loop below.
875 srcPosBytes
= savedSrcPosBytes
;
880 if (srcPosBytes
== srcLenBytes
) {
885 // There are invalid bytes. Allocate memory, then copy the input, replacing
886 // invalid sequences with either the substitution character or nothing,
887 // depending on the value of RuntimeOption::Utf8izeReplace.
889 // Worst case, every remaining byte is invalid, taking a 3-byte substitution.
890 int32_t bytesRemaining
= srcLenBytes
- srcPosBytes
;
891 uint64_t dstMaxLenBytes
= srcPosBytes
+ (RuntimeOption::Utf8izeReplace
?
892 bytesRemaining
* U8_LENGTH(SUBSTITUTION_CHARACTER
) :
894 if (dstMaxLenBytes
> INT_MAX
) {
895 return false; // Too long.
897 String
dstStr(dstMaxLenBytes
, ReserveString
);
898 char *dstBuf
= dstStr
.mutableData();
900 // Copy valid bytes found so far as one solid block.
901 memcpy(dstBuf
, srcBuf
, srcPosBytes
);
903 // Iterate through the remaining bytes.
904 int32_t dstPosBytes
= srcPosBytes
; // already copied srcPosBytes
905 for (/* already init'd */; srcPosBytes
< srcLenBytes
; /* see U8_NEXT */) {
906 UChar32 curCodePoint
;
907 // This is lame, but gcc doesn't optimize U8_NEXT very well
908 if (srcBuf
[srcPosBytes
] != 0 && !(srcBuf
[srcPosBytes
] & 0x80)) {
909 curCodePoint
= srcBuf
[srcPosBytes
++]; // U8_NEXT would increment
911 U8_NEXT(srcBuf
, srcPosBytes
, srcLenBytes
, curCodePoint
);
913 if (curCodePoint
<= 0) {
914 // Invalid UTF-8 sequence.
915 // N.B. We consider a null byte an invalid sequence.
916 if (!RuntimeOption::Utf8izeReplace
) {
917 continue; // Omit invalid sequence
919 curCodePoint
= SUBSTITUTION_CHARACTER
; // Replace invalid sequences
921 // We know that resultBuffer > total possible length.
922 U8_APPEND_UNSAFE(dstBuf
, dstPosBytes
, curCodePoint
);
924 assertx(dstPosBytes
<= dstMaxLenBytes
);
925 input
.assignIfRef(dstStr
.shrink(dstPosBytes
));
930 * Private utf8_strlen implementation.
932 * Returns count of code points in input, substituting 1 code point per invalid
935 * deprecated=true: instead return byte count on invalid UTF-8 sequence.
937 static int fb_utf8_strlen_impl(const String
& input
, bool deprecated
) {
938 // Count, don't modify.
939 int32_t sourceLength
= input
.size();
940 const char* const sourceBuffer
= input
.data();
941 int64_t num_code_points
= 0;
943 for (int32_t sourceOffset
= 0; sourceOffset
< sourceLength
; ) {
944 UChar32 sourceCodePoint
;
945 // U8_NEXT() is guaranteed to advance sourceOffset by 1-4 each time it's
947 U8_NEXT(sourceBuffer
, sourceOffset
, sourceLength
, sourceCodePoint
);
948 if (deprecated
&& sourceCodePoint
< 0) {
949 return sourceLength
; // return byte count on invalid sequence
953 return num_code_points
;
956 int64_t HHVM_FUNCTION(fb_utf8_strlen
, const String
& input
) {
957 return fb_utf8_strlen_impl(input
, /* deprecated */ false);
960 int64_t HHVM_FUNCTION(fb_utf8_strlen_deprecated
, const String
& input
) {
961 return fb_utf8_strlen_impl(input
, /* deprecated */ true);
965 * Private helper; requires non-negative firstCodePoint and desiredCodePoints.
967 static String
fb_utf8_substr_simple(const String
& str
,
968 int32_t firstCodePoint
,
969 int32_t numDesiredCodePoints
) {
970 const char* const srcBuf
= str
.data();
971 int32_t srcLenBytes
= str
.size(); // May truncate; checked before use below.
973 assertx(firstCodePoint
>= 0); // Wrapper fixes up negative starting positions.
974 assertx(numDesiredCodePoints
> 0); // Wrapper fixes up negative/zero length.
975 if (str
.size() <= 0 ||
976 str
.size() > INT_MAX
||
977 firstCodePoint
>= srcLenBytes
) {
978 return empty_string();
981 // Cannot be more code points than bytes in input. This typically reduces
982 // the INT_MAX default value to something more reasonable.
983 numDesiredCodePoints
= std::min(numDesiredCodePoints
,
984 srcLenBytes
- firstCodePoint
);
986 // Pre-allocate the result.
987 // the worst case can come from one of two sources:
988 // - every code point could be the substitution char (3 bytes)
989 // giving us numDesiredCodePoints * 3
990 // - every code point could be 4 bytes long, giving us
991 // numDesiredCodePoints * 4 - but capped by the length of the input
992 uint64_t dstMaxLenBytes
=
993 std::min((uint64_t)numDesiredCodePoints
* 4,
994 (uint64_t)srcLenBytes
- firstCodePoint
);
995 dstMaxLenBytes
= std::max(dstMaxLenBytes
,
996 (uint64_t)numDesiredCodePoints
*
997 U8_LENGTH(SUBSTITUTION_CHARACTER
));
998 if (dstMaxLenBytes
> INT_MAX
) {
999 return empty_string(); // Too long.
1001 String
dstStr(dstMaxLenBytes
, ReserveString
);
1002 char* dstBuf
= dstStr
.mutableData();
1003 int32_t dstPosBytes
= 0;
1005 // Iterate through src's codepoints; srcPosBytes is incremented by U8_NEXT.
1006 for (int32_t srcPosBytes
= 0, srcPosCodePoints
= 0;
1007 srcPosBytes
< srcLenBytes
&& // more available
1008 srcPosCodePoints
< firstCodePoint
+ numDesiredCodePoints
; // want more
1009 srcPosCodePoints
++) {
1011 // U8_NEXT() advances sourceBytePos by 1-4 each time it's invoked.
1012 UChar32 curCodePoint
;
1013 U8_NEXT(srcBuf
, srcPosBytes
, srcLenBytes
, curCodePoint
);
1015 if (srcPosCodePoints
>= firstCodePoint
) {
1016 // Copy this code point into the result.
1017 if (curCodePoint
< 0) {
1018 curCodePoint
= SUBSTITUTION_CHARACTER
; // replace invalid sequences
1020 // We know that resultBuffer > total possible length.
1021 // U8_APPEND_UNSAFE updates dstPosBytes.
1022 U8_APPEND_UNSAFE(dstBuf
, dstPosBytes
, curCodePoint
);
1026 assertx(dstPosBytes
<= dstMaxLenBytes
);
1027 if (dstPosBytes
> 0) {
1028 dstStr
.shrink(dstPosBytes
);
1031 return empty_string();
1034 String
HHVM_FUNCTION(fb_utf8_substr
, const String
& str
, int64_t start
,
1035 int64_t length
/* = INT_MAX */) {
1036 if (length
> INT_MAX
) {
1039 // For negative start or length, calculate start and length values
1040 // based on total code points.
1041 if (start
< 0 || length
< 0) {
1042 // Get number of code points assuming we substitute invalid sequences.
1043 Variant utf8StrlenResult
= HHVM_FN(fb_utf8_strlen
)(str
);
1044 int32_t sourceNumCodePoints
= utf8StrlenResult
.toInt32();
1047 // Negative means first character is start'th code point from end.
1048 // e.g., -1 means start with the last code point.
1049 start
= sourceNumCodePoints
+ start
; // adding negative start
1052 // Negative means omit last abs(length) code points.
1053 length
= sourceNumCodePoints
- start
+ length
; // adding negative length
1056 if (start
< 0 || length
<= 0) {
1057 return empty_string(); // Empty result
1060 return fb_utf8_substr_simple(str
, start
, length
);
1063 ///////////////////////////////////////////////////////////////////////////////
1065 bool HHVM_FUNCTION(fb_intercept
, const String
& name
, const Variant
& handler
,
1066 const Variant
& data
/* = uninit_variant */) {
1067 return register_intercept(name
, handler
, data
, true);
1070 bool HHVM_FUNCTION(fb_rename_function
, const String
& orig_func_name
,
1071 const String
& new_func_name
) {
1072 if (orig_func_name
.empty() || new_func_name
.empty() ||
1073 orig_func_name
.get()->isame(new_func_name
.get())) {
1074 throw_invalid_argument("unable to rename %s", orig_func_name
.data());
1078 if (!function_exists(orig_func_name
)) {
1079 raise_warning("fb_rename_function(%s, %s) failed: %s does not exist!",
1080 orig_func_name
.data(), new_func_name
.data(),
1081 orig_func_name
.data());
1085 if (function_exists(new_func_name
)) {
1086 if (new_func_name
.data()[0] != '1') {
1087 raise_warning("fb_rename_function(%s, %s) failed: %s already exists!",
1088 orig_func_name
.data(), new_func_name
.data(),
1089 new_func_name
.data());
1094 rename_function(orig_func_name
, new_func_name
);
1098 ///////////////////////////////////////////////////////////////////////////////
1100 Variant
HHVM_FUNCTION(fb_get_code_coverage
, bool flush
) {
1101 RequestInfo
*ti
= RequestInfo::s_requestInfo
.getNoCheck();
1102 if (ti
->m_reqInjectionData
.getCoverage()) {
1103 Array ret
= ti
->m_coverage
->Report();
1105 ti
->m_coverage
->Reset();
1112 void HHVM_FUNCTION(fb_enable_code_coverage
) {
1113 RequestInfo
*ti
= RequestInfo::s_requestInfo
.getNoCheck();
1114 ti
->m_coverage
->Reset();
1115 ti
->m_reqInjectionData
.setCoverage(true);
1116 if (g_context
->isNested()) {
1117 raise_notice("Calling fb_enable_code_coverage from a nested "
1118 "VM instance may cause unpredicable results");
1120 throw VMSwitchModeBuiltin();
1123 Array
disable_code_coverage_helper(bool report_frequency
) {
1124 RequestInfo
*ti
= RequestInfo::s_requestInfo
.getNoCheck();
1125 ti
->m_reqInjectionData
.setCoverage(false);
1126 auto ret
= ti
->m_coverage
->Report(report_frequency
);
1127 ti
->m_coverage
->Reset();
1131 Array
HHVM_FUNCTION(fb_disable_code_coverage
) {
1132 return disable_code_coverage_helper(/* report frequency */ false);
1135 Array
HHVM_FUNCTION(HH_disable_code_coverage_with_frequency
) {
1136 return disable_code_coverage_helper(/* report frequency */ true);
1139 ///////////////////////////////////////////////////////////////////////////////
1141 bool HHVM_FUNCTION(fb_output_compression
, bool new_value
) {
1142 Transport
*transport
= g_context
->getTransport();
1144 bool rv
= transport
->isCompressionEnabled();
1146 transport
->enableCompression();
1148 transport
->disableCompression();
1155 void HHVM_FUNCTION(fb_set_exit_callback
, const Variant
& function
) {
1156 g_context
->setExitCallback(function
);
1160 s_flush_stats("flush_stats"),
1161 s_chunk_stats("chunk_stats"),
1166 int64_t HHVM_FUNCTION(fb_get_last_flush_size
) {
1167 Transport
*transport
= g_context
->getTransport();
1168 return transport
? transport
->getLastChunkSentSize() : 0;
1171 extern Array
stat_impl(struct stat
*); // ext_file.cpp
1173 template<class Function
>
1174 static Variant
do_lazy_stat(Function dostat
, const String
& filename
) {
1176 if (dostat(File::TranslatePathWithFileCache(filename
).c_str(), &sb
)) {
1177 Logger::Verbose("%s/%d: %s", __FUNCTION__
, __LINE__
,
1178 folly::errnoStr(errno
).c_str());
1181 return stat_impl(&sb
);
1184 Variant
HHVM_FUNCTION(fb_lazy_lstat
, const String
& filename
) {
1185 if (!FileUtil::checkPathAndWarn(filename
, __FUNCTION__
+ 2, 1)) {
1188 return do_lazy_stat(StatCache::lstat
, filename
);
1191 Variant
HHVM_FUNCTION(fb_lazy_realpath
, const String
& filename
) {
1192 if (!FileUtil::checkPathAndWarn(filename
, __FUNCTION__
+ 2, 1)) {
1196 return StatCache::realpath(filename
.c_str());
1199 int64_t HHVM_FUNCTION(HH_non_crypto_md5_upper
, StringArg str
) {
1200 Md5Digest
md5(str
.get()->data(), str
.get()->size());
1202 // Work around "strict aliasing" with memcpy
1203 memcpy(&pre_decode
, md5
.digest
, sizeof(pre_decode
));
1204 // When PHP/Hack users decode MD5 hex, they treat it as big endian.
1205 // Replicate that here.
1206 return folly::Endian::big(pre_decode
);
1209 int64_t HHVM_FUNCTION(HH_non_crypto_md5_lower
, StringArg str
) {
1210 Md5Digest
md5(str
.get()->data(), str
.get()->size());
1212 // Work around "strict aliasing" with memcpy
1213 memcpy(&pre_decode
, md5
.digest
+ 8, sizeof(pre_decode
));
1214 // When PHP/Hack users decode MD5 hex, they treat it as big endian.
1215 // Replicate that here.
1216 return folly::Endian::big(pre_decode
);
1219 ///////////////////////////////////////////////////////////////////////////////
1223 // TODO(8117903): Unused; remove after updating www side.
1226 ///////////////////////////////////////////////////////////////////////////////
1228 struct FBExtension
: Extension
{
1229 FBExtension(): Extension("fb", "1.0.0") {}
1231 void moduleInit() override
{
1232 HHVM_RC_BOOL_SAME(HHVM_FACEBOOK
);
1233 HHVM_RC_BOOL(HHVM_NO_DESTRUCTORS
, one_bit_refcount
);
1234 HHVM_RC_INT_SAME(FB_UNSERIALIZE_NONSTRING_VALUE
);
1235 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_END
);
1236 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNRECOGNIZED_OBJECT_TYPE
);
1237 HHVM_RC_INT_SAME(FB_UNSERIALIZE_UNEXPECTED_ARRAY_KEY_TYPE
);
1239 HHVM_RC_INT(FB_SERIALIZE_HACK_ARRAYS
, k_FB_SERIALIZE_HACK_ARRAYS
);
1241 HHVM_FE(fb_serialize
);
1242 HHVM_FE(fb_unserialize
);
1243 HHVM_FE(fb_compact_serialize
);
1244 HHVM_FE(fb_compact_unserialize
);
1245 HHVM_FE(fb_utf8ize
);
1246 HHVM_FE(fb_utf8_strlen
);
1247 HHVM_FE(fb_utf8_strlen_deprecated
);
1248 HHVM_FE(fb_utf8_substr
);
1249 HHVM_FE(fb_intercept
);
1250 HHVM_FE(fb_rename_function
);
1251 HHVM_FE(fb_get_code_coverage
);
1252 HHVM_FE(fb_enable_code_coverage
);
1253 HHVM_FE(fb_disable_code_coverage
);
1254 HHVM_FE(fb_output_compression
);
1255 HHVM_FE(fb_set_exit_callback
);
1256 HHVM_FE(fb_get_last_flush_size
);
1257 HHVM_FE(fb_lazy_lstat
);
1258 HHVM_FE(fb_lazy_realpath
);
1260 HHVM_FALIAS(HH
\\disable_code_coverage_with_frequency
,
1261 HH_disable_code_coverage_with_frequency
);
1262 HHVM_FALIAS(HH
\\non_crypto_md5_upper
, HH_non_crypto_md5_upper
);
1263 HHVM_FALIAS(HH
\\non_crypto_md5_lower
, HH_non_crypto_md5_lower
);
1269 ///////////////////////////////////////////////////////////////////////////////