Make "String length exceeded" fatal consistent and correct.
[hiphop-php.git] / hphp / runtime / base / string-data.h
blob9c3b88d85c8655956028de2fb2d7d1433de5dc0d
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_STRING_DATA_H_
18 #define incl_HPHP_STRING_DATA_H_
20 #include <folly/Range.h>
22 #include "hphp/util/alloc.h"
23 #include "hphp/util/bstring.h"
24 #include "hphp/util/hash.h"
25 #include "hphp/util/word-mem.h"
27 #include "hphp/runtime/base/countable.h"
28 #include "hphp/runtime/base/datatype.h"
29 #include "hphp/runtime/base/exceptions.h"
30 #include "hphp/runtime/base/memory-manager.h"
31 #include "hphp/runtime/base/string-data-macros.h"
33 namespace HPHP {
35 //////////////////////////////////////////////////////////////////////
37 struct APCString;
38 struct Array;
39 struct String;
41 //////////////////////////////////////////////////////////////////////
43 // Copy the passed-in string and free the buffer immediately.
44 enum AttachStringMode { AttachString };
46 // const char* points to client-owned memory, StringData will copy it
47 // at construct-time using req::malloc. This is only ok when the StringData
48 // itself was request-allocated.
49 enum CopyStringMode { CopyString };
52 * Runtime representation of PHP strings.
54 * StringData's have two different modes, not all of which we want to
55 * keep forever. The main mode is Flat, which means StringData is a
56 * header in a contiguous allocation with the character array for the
57 * string. The other (Proxy) is for APCString-backed StringDatas.
59 * StringDatas can also be allocated in multiple ways. Normally, they
60 * are created through one of the Make overloads, which drops them in
61 * the request-local heap. They can also be low-malloced (for static
62 * strings), or malloc'd (MakeMalloc) for APC shared or uncounted strings.
64 * Here's a breakdown of string modes, and which configurations are
65 * allowed in which allocation mode:
67 * | Static | Malloced | Normal (request local)
68 * +--------+----------+-----------------------
69 * Flat | X | X | X
70 * Proxy | | | X
72 struct StringData final : MaybeCountable,
73 type_scan::MarkCountable<StringData> {
74 friend struct APCString;
75 friend StringData* allocFlat(size_t len);
78 * Max length of a string, not counting the terminal 0.
80 * This is smaller than MAX_INT, and it plus StringData overhead should
81 * exactly equal a size class.
83 #ifdef NO_M_DATA
84 static constexpr uint32_t MaxSize = 0x80000000U - 16 - 1;
85 #else
86 static constexpr uint32_t MaxSize = 0x80000000U - 24 - 1;
87 #endif
90 * Creates an empty request-local string with an unspecified amount of
91 * reserved space. Ref-count is pre-initialized to 1.
93 static StringData* Make();
96 * Constructors that copy the string memory into this StringData, for
97 * request-local strings. Ref-count is pre-initialized to 1.
99 * Most strings are created this way.
101 static StringData* Make(folly::StringPiece);
103 static StringData* Make(const char* data, CopyStringMode);
104 static StringData* Make(const char* data, size_t len, CopyStringMode);
105 static StringData* Make(const StringData* s, CopyStringMode);
106 static StringData* Make(folly::StringPiece r1, CopyStringMode);
109 * Attach constructors for request-local strings.
111 * These do the same thing as the above CopyStringMode constructors, except
112 * that it will also free `data'. Ref-count is pre-initialized to 1.
114 static StringData* Make(char* data, AttachStringMode);
115 static StringData* Make(char* data, size_t len, AttachStringMode);
118 * Create a new request-local string by concatenating two existing
119 * strings. Ref-count is pre-initialized to 1.
121 static StringData* Make(const StringData* s1, const StringData* s2);
122 static StringData* Make(const StringData* s1, folly::StringPiece s2);
123 static StringData* Make(const StringData* s1, const char* lit2);
124 static StringData* Make(folly::StringPiece s1, const char* lit2);
125 static StringData* Make(folly::StringPiece s1, folly::StringPiece s2);
126 static StringData* Make(folly::StringPiece s1, folly::StringPiece s2,
127 folly::StringPiece s3);
128 static StringData* Make(folly::StringPiece s1, folly::StringPiece s2,
129 folly::StringPiece s3, folly::StringPiece s4);
132 * Create a new request-local empty string big enough to hold strings of
133 * length `reserve' (not counting the \0 terminator). Ref-count is
134 * pre-initialized to 1.
136 static StringData* Make(size_t reserve);
139 * Create a request-local "Proxy" StringData that wraps an APCString.
140 * Ref-count is pre-initialized to 1.
142 static StringData* MakeProxy(const APCString* apcstr);
145 * Allocate a string with malloc, using the low-memory allocator if
146 * jemalloc is available, and setting it as a static string.
148 * This api is only for the static-string-table.cpp. The returned
149 * StringData is not expected to be reference counted, and must be
150 * deallocated using destructStatic.
152 static StringData* MakeStatic(folly::StringPiece);
155 * Same as MakeStatic but the string allocated will *not* be in the static
156 * string table, will not be in low-memory, and should be deleted using
157 * destructUncounted once the root goes out of scope.
159 static StringData* MakeUncounted(folly::StringPiece);
162 * Same as MakeStatic but initializes the empty string in aligned storage.
163 * This should be called by the static string table initialization code.
165 static StringData* MakeEmpty();
168 * Offset accessors for the JIT compiler.
170 #ifndef NO_M_DATA
171 static constexpr ptrdiff_t dataOff() { return offsetof(StringData, m_data); }
172 #endif
173 static constexpr ptrdiff_t sizeOff() { return offsetof(StringData, m_len); }
174 static constexpr ptrdiff_t hashOff() { return offsetof(StringData, m_hash); }
177 * Proxy StringData's have a sweep list running through them for
178 * decrefing the APCString they are fronting. This function
179 * must be called at request cleanup time to handle this.
181 static unsigned sweepAll();
184 * Called to return a StringData to the request allocator. This is
185 * normally called when the reference count goes to zero (e.g. with
186 * a helper like decRefStr).
188 void release() noexcept;
189 size_t heapSize() const;
192 * StringData objects allocated with MakeStatic should be freed
193 * using this function.
195 void destructStatic();
198 * StringData objects allocated with MakeUncounted should be freed
199 * using this function.
201 void destructUncounted();
204 * Reference-counting related.
206 ALWAYS_INLINE void decRefAndRelease() {
207 assert(kindIsValid());
208 if (decReleaseCheck()) release();
211 bool kindIsValid() const { return m_kind == HeaderKind::String; }
214 * Append the supplied range to this string. If there is not sufficient
215 * capacity in this string to contain the range, a new string may be
216 * returned. The new string's reference count will be pre-initialized to 1.
218 * Pre: !hasMultipleRefs()
219 * Pre: the string is request-local
221 StringData* append(folly::StringPiece r);
222 StringData* append(folly::StringPiece r1, folly::StringPiece r2);
223 StringData* append(folly::StringPiece r1,
224 folly::StringPiece r2,
225 folly::StringPiece r3);
228 * Reserve space for a string of length `maxLen' (not counting null
229 * terminator).
231 * May not be called for strings created with MakeUncounted or
232 * MakeStatic.
234 * Returns: possibly a new StringData, if we had to reallocate. The new
235 * string's reference count will be pre-initialized to 1.
237 StringData* reserve(size_t maxLen);
240 * Shrink a string down to length `len` (not counting null terminator).
242 * May not be called for strings created with MakeUncounted or
243 * MakeStatic.
245 * Returns: possibly a new StringData, if we decided to reallocate. The new
246 * string's reference count is be pre-initialized to 1. shrinkImpl
247 * always returns a new StringData.
249 StringData* shrink(size_t len);
250 StringData* shrinkImpl(size_t len);
253 * Returns a slice with extents sized to the *string* that this
254 * StringData wraps. This range does not include a null terminator.
256 * Note: please do not add new code that assumes the range does
257 * include a null-terminator if possible. (We would like to make
258 * this unnecessary eventually.)
260 folly::StringPiece slice() const;
263 * Returns a mutable slice with extents sized to the *buffer* this
264 * StringData wraps, not the string, minus space for an implicit
265 * null terminator.
267 * Note: please do not introduce new uses of this API that write
268 * nulls 1 byte past slice.len---we want to weed those out.
270 folly::MutableStringPiece bufferSlice();
273 * If external users of this object want to modify it (e.g. through
274 * bufferSlice or mutableData()), they are responsible for either
275 * calling setSize() if the mutation changed the size of the string,
276 * or invalidateHash() if not.
278 * Pre: !hasMultipleRefs()
280 void invalidateHash();
281 void setSize(int len);
284 * StringData should not generally be allocated on the stack,
285 * because references to it could escape. This function is for
286 * debugging: it asserts that the addres of this doesn't point into
287 * the C++ stack.
289 void checkStack() const;
292 * Access to the string's data as a character array.
294 * Please try to prefer slice() in new code, instead of assuming
295 * this is null terminated.
297 const char* data() const;
300 * Mutable version of data().
302 char* mutableData() const;
305 * Accessor for the length of a string.
307 * Note: size() returns a signed int for historical reasons. It is
308 * guaranteed to be in the range (0 <= size() <= MaxSize)
310 int size() const;
313 * Returns: size() == 0
315 bool empty() const;
318 * Return the capacity of this string's buffer, not including the space
319 * for the null terminator. Always 0 for static/uncounted strings.
321 uint32_t capacity() const;
324 * Simultaneously query whether this string is numeric, and pull out
325 * the numeric value of the string (as either an int or a double).
327 * The allow_errors flag is a boolean that does something currently
328 * undocumented.
330 * If overflow is set its value is initialized to either zero to
331 * indicate that no overflow occurred or 1/-1 to inidicate the direction
332 * of overflow.
334 * Returns: KindOfNull, KindOfInt64 or KindOfDouble. The int64_t or
335 * double out reference params are populated in the latter two cases
336 * with the numeric value of the string. The KindOfNull case
337 * indicates the string is not numeric.
339 DataType isNumericWithVal(int64_t&, double&, int allowErrors,
340 int* overflow = nullptr) const;
343 * Returns true if this string is numeric.
345 * In effect: isNumericWithVal(i, d, false) != KindOfNull
347 bool isNumeric() const;
350 * Returns whether this string is numeric and an integer.
352 * In effect: isNumericWithVal(i, d, false) == KindOfInt64
354 bool isInteger() const;
357 * Returns true if this string is "strictly" an integer in the sense
358 * of is_strictly_integer from util/hash.h, and if so provides the
359 * integer value in res.
361 bool isStrictlyInteger(int64_t& res) const;
364 * Returns whether this string contains a single character '0'.
366 bool isZero() const;
369 * Change the character at offset `offset' to `c'.
371 * May return a reallocated StringData* if this string was a shared
372 * string. The new string's reference count is pre-initialized to 1.
374 * Pre: offset >= 0 && offset < size()
375 * !hasMultipleRefs()
376 * string must be request local
378 StringData* modifyChar(int offset, char c);
381 * Return a string containing the character at `offset', if it is in
382 * range. Otherwise raises a warning and returns an empty string.
384 * All return values are guaranteed to be static strings.
386 StringData* getChar(int offset) const;
389 * Increment this string in the manner of php's ++ operator. May return a new
390 * string if it had to resize. The new string's reference count is
391 * pre-initialized to 1.
393 * Pre: !isStatic() && !isEmpty()
394 * string must be request local
396 StringData* increment();
399 * Type conversion functions.
401 bool toBoolean() const;
402 char toByte(int base = 10) const { return toInt64(base); }
403 short toInt16(int base = 10) const { return toInt64(base); }
404 int toInt32(int base = 10) const { return toInt64(base); }
405 int64_t toInt64(int base = 10) const;
406 double toDouble() const;
407 DataType toNumeric(int64_t& lval, double& dval) const;
408 std::string toCppString() const;
411 * Returns: case insensitive hash value for this string.
413 strhash_t hash() const;
414 NEVER_INLINE strhash_t hashHelper() const;
415 static strhash_t hash(const char* s, size_t len);
416 static strhash_t hash_unsafe(const char* s, size_t len);
419 * Equality comparison, in the sense of php's string == operator.
420 * (I.e. numeric strings are compared numerically.)
422 bool equal(const StringData* s) const;
425 * Exact comparison, in the sense of php's string === operator.
426 * (Exact, case-sensitive comparison.)
428 bool same(const StringData* s) const;
431 * Case-insensitive exact string comparison. (Numeric strings are
432 * not treated specially.)
434 bool isame(const StringData* s) const;
437 * Implements comparison in the sense of php's operator < on
438 * strings. (I.e. this compares numeric strings numerically, and
439 * other strings lexicographically.)
441 * Returns: a number less than zero if *this is less than *v2,
442 * greater than zero if *this is greater than *v2, or zero if
443 * this->equal(v2).
445 int compare(const StringData* v2) const;
448 * Debug dumping of a StringData to stdout.
450 void dump() const;
452 static StringData* node2str(StringDataNode* node) {
453 return reinterpret_cast<StringData*>(
454 uintptr_t(node) - offsetof(Proxy, node)
455 - sizeof(StringData)
458 #ifdef NO_M_DATA
459 static constexpr bool isProxy() { return false; }
460 #else
461 bool isProxy() const;
462 #endif
464 bool isImmutable() const;
466 bool checkSane() const;
468 private:
469 struct Proxy {
470 StringDataNode node;
471 const APCString* apcstr;
474 private:
475 template<bool trueStatic>
476 static StringData* MakeShared(folly::StringPiece sl);
478 StringData(const StringData&) = delete;
479 StringData& operator=(const StringData&) = delete;
480 ~StringData() = delete;
482 private:
483 const void* payload() const;
484 void* payload();
485 const Proxy* proxy() const;
486 Proxy* proxy();
488 #ifdef NO_M_DATA
489 static constexpr bool isFlat() { return true; }
490 #else
491 bool isFlat() const;
492 #endif
494 void releaseDataSlowPath();
495 int numericCompare(const StringData *v2) const;
496 StringData* escalate(size_t cap);
497 void enlist();
498 void delist();
499 void incrementHelper();
500 void preCompute();
502 // We have the next fields blocked into qword-size unions so
503 // StringData initialization can do fewer stores to initialize the
504 // fields. (gcc does not combine the stores itself.)
505 private:
506 #ifndef NO_M_DATA
507 // TODO(5601154): Add KindOfApcString and remove StringData m_data field.
508 char* m_data;
509 #endif
510 union {
511 struct {
512 uint32_t m_len;
513 mutable int32_t m_hash; // precomputed for persistent strings
515 uint64_t m_lenAndHash;
519 //////////////////////////////////////////////////////////////////////
522 * The allocation overhead of a StringData: the struct plus the null byte
524 auto constexpr kStringOverhead = sizeof(StringData) + 1;
525 static_assert(StringData::MaxSize + kStringOverhead == kSizeIndex2Size[103],
526 "max allocation size is a valid size class");
529 * A reasonable length to reserve for small strings. This is also the
530 * default reserve size for StringData::Make().
532 constexpr uint32_t SmallStringReserve = 64 - kStringOverhead;
534 alignas(64) constexpr uint32_t kSizeIndex2StringCapacity[] = {
535 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
536 ((uint32_t{1}<<lg_grp) + (uint32_t{ndelta}<<lg_delta)) > kStringOverhead \
537 ? ((uint32_t{1}<<lg_grp) + (uint32_t{ndelta}<<lg_delta)) - kStringOverhead \
538 : 0,
539 SIZE_CLASSES
540 #undef SIZE_CLASS
544 * Call this if we tried to make a string longer than StringData::MaxSize
546 void raiseStringLengthExceededError(size_t len);
549 * DecRef a string s, calling release if its reference count goes to
550 * zero.
552 void decRefStr(StringData* s);
554 //////////////////////////////////////////////////////////////////////
557 * Function objects the forward to the StringData member functions of
558 * the same name.
560 struct string_data_hash;
561 struct string_data_same;
562 struct string_data_isame;
563 struct string_data_lt;
564 struct string_data_lti;
566 //////////////////////////////////////////////////////////////////////
568 extern std::aligned_storage<
569 kStringOverhead,
570 alignof(StringData)
571 >::type s_theEmptyString;
574 * Return the "static empty string". This is a singleton StaticString
575 * that can be used to return a StaticString for the empty string in
576 * as lightweight a manner as possible.
578 ALWAYS_INLINE StringData* staticEmptyString() {
579 void* vp = &s_theEmptyString;
580 return static_cast<StringData*>(vp);
583 //////////////////////////////////////////////////////////////////////
587 namespace folly {
588 template<> class FormatValue<const HPHP::StringData*> {
589 public:
590 explicit FormatValue(const HPHP::StringData* str) : m_val(str) {}
592 template<typename Callback>
593 void format(FormatArg& arg, Callback& cb) const {
594 auto piece = folly::StringPiece(m_val->data(), m_val->size());
595 format_value::formatString(piece, arg, cb);
598 private:
599 const HPHP::StringData* m_val;
602 template<> class FormatValue<HPHP::StringData*> {
603 public:
604 explicit FormatValue(const HPHP::StringData* str) : m_val(str) {}
606 template<typename Callback>
607 void format(FormatArg& arg, Callback& cb) const {
608 FormatValue<const HPHP::StringData*>(m_val).format(arg, cb);
611 private:
612 const HPHP::StringData* m_val;
616 #include "hphp/runtime/base/string-data-inl.h"
618 #endif