2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_STRING_DATA_H_
18 #define incl_HPHP_STRING_DATA_H_
20 #include <folly/Range.h>
22 #include "hphp/util/alloc.h"
23 #include "hphp/util/bstring.h"
24 #include "hphp/util/hash.h"
25 #include "hphp/util/word-mem.h"
27 #include "hphp/runtime/base/countable.h"
28 #include "hphp/runtime/base/datatype.h"
29 #include "hphp/runtime/base/exceptions.h"
30 #include "hphp/runtime/base/memory-manager.h"
31 #include "hphp/runtime/base/string-data-macros.h"
35 //////////////////////////////////////////////////////////////////////
41 //////////////////////////////////////////////////////////////////////
43 // Copy the passed-in string and free the buffer immediately.
44 enum AttachStringMode
{ AttachString
};
46 // const char* points to client-owned memory, StringData will copy it
47 // at construct-time using req::malloc. This is only ok when the StringData
48 // itself was request-allocated.
49 enum CopyStringMode
{ CopyString
};
52 * Runtime representation of PHP strings.
54 * StringData's have two different modes, not all of which we want to
55 * keep forever. The main mode is Flat, which means StringData is a
56 * header in a contiguous allocation with the character array for the
57 * string. The other (Proxy) is for APCString-backed StringDatas.
59 * StringDatas can also be allocated in multiple ways. Normally, they
60 * are created through one of the Make overloads, which drops them in
61 * the request-local heap. They can also be low-malloced (for static
62 * strings), or malloc'd (MakeMalloc) for APC shared or uncounted strings.
64 * Here's a breakdown of string modes, and which configurations are
65 * allowed in which allocation mode:
67 * | Static | Malloced | Normal (request local)
68 * +--------+----------+-----------------------
72 struct StringData final
: MaybeCountable
,
73 type_scan::MarkCountable
<StringData
> {
74 friend struct APCString
;
75 friend StringData
* allocFlat(size_t len
);
78 * Max length of a string, not counting the terminal 0.
80 * This is smaller than MAX_INT, and it plus StringData overhead should
81 * exactly equal a size class.
84 static constexpr uint32_t MaxSize
= 0x80000000U
- 16 - 1;
86 static constexpr uint32_t MaxSize
= 0x80000000U
- 24 - 1;
90 * Creates an empty request-local string with an unspecified amount of
91 * reserved space. Ref-count is pre-initialized to 1.
93 static StringData
* Make();
96 * Constructors that copy the string memory into this StringData, for
97 * request-local strings. Ref-count is pre-initialized to 1.
99 * Most strings are created this way.
101 static StringData
* Make(folly::StringPiece
);
103 static StringData
* Make(const char* data
, CopyStringMode
);
104 static StringData
* Make(const char* data
, size_t len
, CopyStringMode
);
105 static StringData
* Make(const StringData
* s
, CopyStringMode
);
106 static StringData
* Make(folly::StringPiece r1
, CopyStringMode
);
109 * Attach constructors for request-local strings.
111 * These do the same thing as the above CopyStringMode constructors, except
112 * that it will also free `data'. Ref-count is pre-initialized to 1.
114 static StringData
* Make(char* data
, AttachStringMode
);
115 static StringData
* Make(char* data
, size_t len
, AttachStringMode
);
118 * Create a new request-local string by concatenating two existing
119 * strings. Ref-count is pre-initialized to 1.
121 static StringData
* Make(const StringData
* s1
, const StringData
* s2
);
122 static StringData
* Make(const StringData
* s1
, folly::StringPiece s2
);
123 static StringData
* Make(const StringData
* s1
, const char* lit2
);
124 static StringData
* Make(folly::StringPiece s1
, const char* lit2
);
125 static StringData
* Make(folly::StringPiece s1
, folly::StringPiece s2
);
126 static StringData
* Make(folly::StringPiece s1
, folly::StringPiece s2
,
127 folly::StringPiece s3
);
128 static StringData
* Make(folly::StringPiece s1
, folly::StringPiece s2
,
129 folly::StringPiece s3
, folly::StringPiece s4
);
132 * Create a new request-local empty string big enough to hold strings of
133 * length `reserve' (not counting the \0 terminator). Ref-count is
134 * pre-initialized to 1.
136 static StringData
* Make(size_t reserve
);
139 * Create a request-local "Proxy" StringData that wraps an APCString.
140 * Ref-count is pre-initialized to 1.
142 static StringData
* MakeProxy(const APCString
* apcstr
);
145 * Allocate a string with malloc, using the low-memory allocator if
146 * jemalloc is available, and setting it as a static string.
148 * This api is only for the static-string-table.cpp. The returned
149 * StringData is not expected to be reference counted, and must be
150 * deallocated using destructStatic.
152 static StringData
* MakeStatic(folly::StringPiece
);
155 * Same as MakeStatic but the string allocated will *not* be in the static
156 * string table, will not be in low-memory, and should be deleted using
157 * destructUncounted once the root goes out of scope.
159 static StringData
* MakeUncounted(folly::StringPiece
);
162 * Same as MakeStatic but initializes the empty string in aligned storage.
163 * This should be called by the static string table initialization code.
165 static StringData
* MakeEmpty();
168 * Offset accessors for the JIT compiler.
171 static constexpr ptrdiff_t dataOff() { return offsetof(StringData
, m_data
); }
173 static constexpr ptrdiff_t sizeOff() { return offsetof(StringData
, m_len
); }
174 static constexpr ptrdiff_t hashOff() { return offsetof(StringData
, m_hash
); }
177 * Proxy StringData's have a sweep list running through them for
178 * decrefing the APCString they are fronting. This function
179 * must be called at request cleanup time to handle this.
181 static unsigned sweepAll();
184 * Called to return a StringData to the request allocator. This is
185 * normally called when the reference count goes to zero (e.g. with
186 * a helper like decRefStr).
188 void release() noexcept
;
189 size_t heapSize() const;
192 * StringData objects allocated with MakeStatic should be freed
193 * using this function.
195 void destructStatic();
198 * StringData objects allocated with MakeUncounted should be freed
199 * using this function.
201 void destructUncounted();
204 * Reference-counting related.
206 ALWAYS_INLINE
void decRefAndRelease() {
207 assert(kindIsValid());
208 if (decReleaseCheck()) release();
211 bool kindIsValid() const { return m_kind
== HeaderKind::String
; }
214 * Append the supplied range to this string. If there is not sufficient
215 * capacity in this string to contain the range, a new string may be
216 * returned. The new string's reference count will be pre-initialized to 1.
218 * Pre: !hasMultipleRefs()
219 * Pre: the string is request-local
221 StringData
* append(folly::StringPiece r
);
222 StringData
* append(folly::StringPiece r1
, folly::StringPiece r2
);
223 StringData
* append(folly::StringPiece r1
,
224 folly::StringPiece r2
,
225 folly::StringPiece r3
);
228 * Reserve space for a string of length `maxLen' (not counting null
231 * May not be called for strings created with MakeUncounted or
234 * Returns: possibly a new StringData, if we had to reallocate. The new
235 * string's reference count will be pre-initialized to 1.
237 StringData
* reserve(size_t maxLen
);
240 * Shrink a string down to length `len` (not counting null terminator).
242 * May not be called for strings created with MakeUncounted or
245 * Returns: possibly a new StringData, if we decided to reallocate. The new
246 * string's reference count is be pre-initialized to 1. shrinkImpl
247 * always returns a new StringData.
249 StringData
* shrink(size_t len
);
250 StringData
* shrinkImpl(size_t len
);
253 * Returns a slice with extents sized to the *string* that this
254 * StringData wraps. This range does not include a null terminator.
256 * Note: please do not add new code that assumes the range does
257 * include a null-terminator if possible. (We would like to make
258 * this unnecessary eventually.)
260 folly::StringPiece
slice() const;
263 * Returns a mutable slice with extents sized to the *buffer* this
264 * StringData wraps, not the string, minus space for an implicit
267 * Note: please do not introduce new uses of this API that write
268 * nulls 1 byte past slice.len---we want to weed those out.
270 folly::MutableStringPiece
bufferSlice();
273 * If external users of this object want to modify it (e.g. through
274 * bufferSlice or mutableData()), they are responsible for either
275 * calling setSize() if the mutation changed the size of the string,
276 * or invalidateHash() if not.
278 * Pre: !hasMultipleRefs()
280 void invalidateHash();
281 void setSize(int len
);
284 * StringData should not generally be allocated on the stack,
285 * because references to it could escape. This function is for
286 * debugging: it asserts that the addres of this doesn't point into
289 void checkStack() const;
292 * Access to the string's data as a character array.
294 * Please try to prefer slice() in new code, instead of assuming
295 * this is null terminated.
297 const char* data() const;
300 * Mutable version of data().
302 char* mutableData() const;
305 * Accessor for the length of a string.
307 * Note: size() returns a signed int for historical reasons. It is
308 * guaranteed to be in the range (0 <= size() <= MaxSize)
313 * Returns: size() == 0
318 * Return the capacity of this string's buffer, not including the space
319 * for the null terminator. Always 0 for static/uncounted strings.
321 uint32_t capacity() const;
324 * Simultaneously query whether this string is numeric, and pull out
325 * the numeric value of the string (as either an int or a double).
327 * The allow_errors flag is a boolean that does something currently
330 * If overflow is set its value is initialized to either zero to
331 * indicate that no overflow occurred or 1/-1 to inidicate the direction
334 * Returns: KindOfNull, KindOfInt64 or KindOfDouble. The int64_t or
335 * double out reference params are populated in the latter two cases
336 * with the numeric value of the string. The KindOfNull case
337 * indicates the string is not numeric.
339 DataType
isNumericWithVal(int64_t&, double&, int allowErrors
,
340 int* overflow
= nullptr) const;
343 * Returns true if this string is numeric.
345 * In effect: isNumericWithVal(i, d, false) != KindOfNull
347 bool isNumeric() const;
350 * Returns whether this string is numeric and an integer.
352 * In effect: isNumericWithVal(i, d, false) == KindOfInt64
354 bool isInteger() const;
357 * Returns true if this string is "strictly" an integer in the sense
358 * of is_strictly_integer from util/hash.h, and if so provides the
359 * integer value in res.
361 bool isStrictlyInteger(int64_t& res
) const;
364 * Returns whether this string contains a single character '0'.
369 * Change the character at offset `offset' to `c'.
371 * May return a reallocated StringData* if this string was a shared
372 * string. The new string's reference count is pre-initialized to 1.
374 * Pre: offset >= 0 && offset < size()
376 * string must be request local
378 StringData
* modifyChar(int offset
, char c
);
381 * Return a string containing the character at `offset', if it is in
382 * range. Otherwise raises a warning and returns an empty string.
384 * All return values are guaranteed to be static strings.
386 StringData
* getChar(int offset
) const;
389 * Increment this string in the manner of php's ++ operator. May return a new
390 * string if it had to resize. The new string's reference count is
391 * pre-initialized to 1.
393 * Pre: !isStatic() && !isEmpty()
394 * string must be request local
396 StringData
* increment();
399 * Type conversion functions.
401 bool toBoolean() const;
402 char toByte(int base
= 10) const { return toInt64(base
); }
403 short toInt16(int base
= 10) const { return toInt64(base
); }
404 int toInt32(int base
= 10) const { return toInt64(base
); }
405 int64_t toInt64(int base
= 10) const;
406 double toDouble() const;
407 DataType
toNumeric(int64_t& lval
, double& dval
) const;
408 std::string
toCppString() const;
411 * Returns: case insensitive hash value for this string.
413 strhash_t
hash() const;
414 NEVER_INLINE strhash_t
hashHelper() const;
415 static strhash_t
hash(const char* s
, size_t len
);
416 static strhash_t
hash_unsafe(const char* s
, size_t len
);
419 * Equality comparison, in the sense of php's string == operator.
420 * (I.e. numeric strings are compared numerically.)
422 bool equal(const StringData
* s
) const;
425 * Exact comparison, in the sense of php's string === operator.
426 * (Exact, case-sensitive comparison.)
428 bool same(const StringData
* s
) const;
431 * Case-insensitive exact string comparison. (Numeric strings are
432 * not treated specially.)
434 bool isame(const StringData
* s
) const;
437 * Implements comparison in the sense of php's operator < on
438 * strings. (I.e. this compares numeric strings numerically, and
439 * other strings lexicographically.)
441 * Returns: a number less than zero if *this is less than *v2,
442 * greater than zero if *this is greater than *v2, or zero if
445 int compare(const StringData
* v2
) const;
448 * Debug dumping of a StringData to stdout.
452 static StringData
* node2str(StringDataNode
* node
) {
453 return reinterpret_cast<StringData
*>(
454 uintptr_t(node
) - offsetof(Proxy
, node
)
459 static constexpr bool isProxy() { return false; }
461 bool isProxy() const;
464 bool isImmutable() const;
466 bool checkSane() const;
471 const APCString
* apcstr
;
475 template<bool trueStatic
>
476 static StringData
* MakeShared(folly::StringPiece sl
);
478 StringData(const StringData
&) = delete;
479 StringData
& operator=(const StringData
&) = delete;
480 ~StringData() = delete;
483 const void* payload() const;
485 const Proxy
* proxy() const;
489 static constexpr bool isFlat() { return true; }
494 void releaseDataSlowPath();
495 int numericCompare(const StringData
*v2
) const;
496 StringData
* escalate(size_t cap
);
499 void incrementHelper();
502 // We have the next fields blocked into qword-size unions so
503 // StringData initialization can do fewer stores to initialize the
504 // fields. (gcc does not combine the stores itself.)
507 // TODO(5601154): Add KindOfApcString and remove StringData m_data field.
513 mutable int32_t m_hash
; // precomputed for persistent strings
515 uint64_t m_lenAndHash
;
519 //////////////////////////////////////////////////////////////////////
522 * The allocation overhead of a StringData: the struct plus the null byte
524 auto constexpr kStringOverhead
= sizeof(StringData
) + 1;
525 static_assert(StringData::MaxSize
+ kStringOverhead
== kSizeIndex2Size
[103],
526 "max allocation size is a valid size class");
529 * A reasonable length to reserve for small strings. This is also the
530 * default reserve size for StringData::Make().
532 constexpr uint32_t SmallStringReserve
= 64 - kStringOverhead
;
534 alignas(64) constexpr uint32_t kSizeIndex2StringCapacity
[] = {
535 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
536 ((uint32_t{1}<<lg_grp) + (uint32_t{ndelta}<<lg_delta)) > kStringOverhead \
537 ? ((uint32_t{1}<<lg_grp) + (uint32_t{ndelta}<<lg_delta)) - kStringOverhead \
544 * Call this if we tried to make a string longer than StringData::MaxSize
546 void raiseStringLengthExceededError(size_t len
);
549 * DecRef a string s, calling release if its reference count goes to
552 void decRefStr(StringData
* s
);
554 //////////////////////////////////////////////////////////////////////
557 * Function objects the forward to the StringData member functions of
560 struct string_data_hash
;
561 struct string_data_same
;
562 struct string_data_isame
;
563 struct string_data_lt
;
564 struct string_data_lti
;
566 //////////////////////////////////////////////////////////////////////
568 extern std::aligned_storage
<
571 >::type s_theEmptyString
;
574 * Return the "static empty string". This is a singleton StaticString
575 * that can be used to return a StaticString for the empty string in
576 * as lightweight a manner as possible.
578 ALWAYS_INLINE StringData
* staticEmptyString() {
579 void* vp
= &s_theEmptyString
;
580 return static_cast<StringData
*>(vp
);
583 //////////////////////////////////////////////////////////////////////
588 template<> class FormatValue
<const HPHP::StringData
*> {
590 explicit FormatValue(const HPHP::StringData
* str
) : m_val(str
) {}
592 template<typename Callback
>
593 void format(FormatArg
& arg
, Callback
& cb
) const {
594 auto piece
= folly::StringPiece(m_val
->data(), m_val
->size());
595 format_value::formatString(piece
, arg
, cb
);
599 const HPHP::StringData
* m_val
;
602 template<> class FormatValue
<HPHP::StringData
*> {
604 explicit FormatValue(const HPHP::StringData
* str
) : m_val(str
) {}
606 template<typename Callback
>
607 void format(FormatArg
& arg
, Callback
& cb
) const {
608 FormatValue
<const HPHP::StringData
*>(m_val
).format(arg
, cb
);
612 const HPHP::StringData
* m_val
;
616 #include "hphp/runtime/base/string-data-inl.h"