Equality
[hiphop-php.git] / hphp / runtime / base / string-data.cpp
blob2e4344b996ec7c4f12917fc16dacad780e0c4195
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/base/string-data.h"
19 #include <cmath>
20 #include <utility>
22 #include "hphp/util/alloc.h"
23 #include "hphp/util/safe-cast.h"
24 #include "hphp/util/stacktrace-profiler.h"
26 #include "hphp/runtime/base/apc-handle-defs.h"
27 #include "hphp/runtime/base/apc-string.h"
28 #include "hphp/runtime/base/builtin-functions.h"
29 #include "hphp/runtime/base/exceptions.h"
30 #include "hphp/runtime/base/runtime-error.h"
31 #include "hphp/runtime/base/runtime-option.h"
32 #include "hphp/runtime/base/tv-uncounted.h"
33 #include "hphp/runtime/base/zend-functions.h"
34 #include "hphp/runtime/base/zend-string.h"
35 #include "hphp/runtime/ext/apc/ext_apc.h"
37 #include "hphp/zend/zend-strtod.h"
39 namespace HPHP {
41 //////////////////////////////////////////////////////////////////////
43 NEVER_INLINE void raiseStringLengthExceededError(size_t len) {
44 raise_error("String length exceeded: %zu > %u", len, StringData::MaxSize);
47 // Allocate, initialize `m_data' and HeapObject, but not `m_lenAndHash'.
48 ALWAYS_INLINE StringData* allocFlat(size_t len) {
49 if (UNLIKELY(len > StringData::MaxSize)) {
50 raiseStringLengthExceededError(len);
52 auto const sizeIndex = MemoryManager::size2Index(len + kStringOverhead);
53 auto sd = static_cast<StringData*>(tl_heap->objMallocIndex(sizeIndex));
54 // Refcount initialized to 1.
55 sd->initHeader_16(HeaderKind::String, OneReference, sizeIndex);
56 assertx(sd->capacity() >= len);
57 #ifndef NO_M_DATA
58 sd->m_data = reinterpret_cast<char*>(sd + 1);
59 #endif
60 return sd;
63 //////////////////////////////////////////////////////////////////////
65 std::aligned_storage<
66 kStringOverhead,
67 alignof(StringData)
68 >::type s_theEmptyString;
70 //////////////////////////////////////////////////////////////////////
72 namespace {
73 std::atomic<bool> s_symbols_loaded;
75 SymbolPrefix* getSymbolPrefix(StringData* sd) {
76 assertx(sd->isSymbol());
77 return reinterpret_cast<SymbolPrefix*>(sd) - 1;
79 const SymbolPrefix* getSymbolPrefix(const StringData* sd) {
80 assertx(sd->isSymbol());
81 return getSymbolPrefix(const_cast<StringData*>(sd));
85 bool StringData::isSymbol() const {
86 return (m_aux16 >> 8) & kIsSymbolMask;
89 void StringData::markSymbolsLoaded() {
90 s_symbols_loaded.store(true, std::memory_order_release);
93 Class* StringData::getCachedClass() const {
94 return getSymbolPrefix(this)->cls;
97 NamedEntity* StringData::getNamedEntity() const {
98 return getSymbolPrefix(this)->ne;
101 void StringData::setCachedClass(Class* cls) {
102 auto const prefix = getSymbolPrefix(this);
103 assertx(IMPLIES(prefix->cls, prefix->cls == cls));
104 prefix->cls = cls;
107 void StringData::setNamedEntity(NamedEntity* ne) {
108 auto const prefix = getSymbolPrefix(this);
109 assertx(IMPLIES(prefix->ne, prefix->ne == ne));
110 prefix->ne = ne;
113 ptrdiff_t StringData::isSymbolOffset() {
114 return offsetof(StringData, m_aux16) + 1;
117 ptrdiff_t StringData::cachedClassOffset() {
118 return offsetof(SymbolPrefix, cls) - sizeof(SymbolPrefix);
121 //////////////////////////////////////////////////////////////////////
123 ptrdiff_t StringData::colorOffset() {
124 return offsetof(StringData, m_aux16);
127 uint16_t StringData::color() const {
128 return m_aux16 & kColorMask;
131 void StringData::setColor(uint16_t color) {
132 assertx((color & ~kColorMask) == 0);
133 m_aux16 |= color;
136 //////////////////////////////////////////////////////////////////////
138 // Create either a static or an uncounted string.
139 // Diffrence between static and uncounted is in the lifetime
140 // of the string. Static are alive for the lifetime of the process.
141 // Uncounted are not ref counted but will be deleted at some point.
142 template <bool trueStatic> ALWAYS_INLINE
143 MemBlock StringData::AllocateShared(folly::StringPiece sl) {
144 if (UNLIKELY(sl.size() > StringData::MaxSize)) {
145 raiseStringLengthExceededError(sl.size());
148 auto const symbol =
149 trueStatic && !s_symbols_loaded.load(std::memory_order_acquire);
151 auto const extra = symbol ? sizeof(SymbolPrefix) : 0;
152 auto const bytes = sl.size() + kStringOverhead + extra;
153 auto const ptr = trueStatic ? static_alloc(bytes) : AllocUncounted(bytes);
154 return MemBlock{ptr, bytes};
157 template <bool trueStatic> ALWAYS_INLINE
158 StringData* StringData::MakeSharedAt(folly::StringPiece sl, MemBlock range) {
159 assertx(range.size >= sl.size() + kStringOverhead);
160 auto const symbol = trueStatic &&
161 !s_symbols_loaded.load(std::memory_order_acquire) &&
162 (range.size >= sl.size() + kStringOverhead + sizeof(SymbolPrefix));
163 auto const extra = symbol ? sizeof(SymbolPrefix) : 0;
164 StringData* sd = reinterpret_cast<StringData*>(
165 reinterpret_cast<uintptr_t>(range.ptr) + extra
167 auto const data = reinterpret_cast<char*>(sd + 1);
169 #ifndef NO_M_DATA
170 sd->m_data = data;
171 #endif
172 auto const count = trueStatic ? StaticValue : UncountedValue;
173 if (symbol) {
174 auto constexpr aux = kIsSymbolMask << 8 | kInvalidColor;
175 sd->initHeader_16(HeaderKind::String, count, aux);
176 getSymbolPrefix(sd)->cls = nullptr;
177 getSymbolPrefix(sd)->ne = nullptr;
178 } else {
179 sd->initHeader_16(HeaderKind::String, count, kInvalidColor);
181 sd->m_len = sl.size(); // m_hash is computed soon.
183 data[sl.size()] = 0;
184 auto const mcret = memcpy(data, sl.data(), sl.size());
185 auto const ret = reinterpret_cast<StringData*>(mcret) - 1;
186 // Recalculating ret from mcret avoids a spill.
187 ret->preCompute(); // get m_hash right
189 assertx(ret == sd);
190 assertx(ret->isFlat());
191 assertx(trueStatic ? ret->isStatic() : ret->isUncounted());
192 assertx(ret->isSymbol() == symbol);
193 assertx(ret->checkSane());
194 return ret;
197 StringData* StringData::MakeStaticAt(folly::StringPiece sl, MemBlock range) {
198 return MakeSharedAt<true>(sl, range);
201 StringData* StringData::MakeStatic(folly::StringPiece sl) {
202 assertx(StaticString::s_globalInit);
203 return MakeStaticAt(sl, AllocateShared<true>(sl));
206 StringData* StringData::MakeUncounted(folly::StringPiece sl) {
207 return MakeSharedAt<false>(sl, AllocateShared<false>(sl));
210 StringData* StringData::MakeEmpty() {
211 return MakeStaticAt(folly::StringPiece{""},
212 MemBlock{&s_theEmptyString, sizeof(s_theEmptyString)});
215 void StringData::destructStatic() {
216 assertx(checkSane() && isStatic());
217 assertx(isFlat());
218 if (isSymbol()) {
219 static_try_free(reinterpret_cast<SymbolPrefix*>(this) - 1,
220 size() + kStringOverhead + sizeof(SymbolPrefix));
221 } else {
222 static_try_free(this, size() + kStringOverhead);
226 void StringData::ReleaseUncounted(StringData* str) {
227 assertx(str->isFlat());
228 assertx(str->checkSane());
229 assertx(!str->uncountedCowCheck());
230 FreeUncounted(str, str->size() + kStringOverhead);
233 //////////////////////////////////////////////////////////////////////
235 ALWAYS_INLINE void StringData::delist() {
236 assertx(isProxy());
237 auto& payload = *proxy();
238 auto const next = payload.node.next;
239 auto const prev = payload.node.prev;
240 assertx(uintptr_t(next) != kMallocFreeWord);
241 assertx(uintptr_t(prev) != kMallocFreeWord);
242 next->prev = prev;
243 prev->next = next;
246 unsigned StringData::sweepAll() {
247 auto& head = tl_heap->getStringList();
248 auto count = 0;
249 for (StringDataNode *next, *n = head.next; n != &head; n = next) {
250 count++;
251 next = n->next;
252 assertx(next && uintptr_t(next) != kSmallFreeWord);
253 assertx(next && uintptr_t(next) != kMallocFreeWord);
254 auto const s = node2str(n);
255 assertx(s->isProxy());
256 s->proxy()->apcstr->unreference();
258 head.next = head.prev = &head;
259 return count;
262 //////////////////////////////////////////////////////////////////////
264 StringData* StringData::Make(const StringData* s, CopyStringMode) {
265 auto const sd = allocFlat(s->m_len);
266 sd->m_lenAndHash = s->m_lenAndHash;
267 auto const data = static_cast<void*>(sd + 1);
268 *memcpy8(data, s->data(), s->m_len) = 0;
270 assertx(sd->same(s));
271 return sd;
274 StringData* StringData::Make(folly::StringPiece sl, CopyStringMode) {
275 auto const sd = allocFlat(sl.size());
276 sd->m_lenAndHash = sl.size(); // hash=0
277 auto const data = reinterpret_cast<char*>(sd + 1);
279 data[sl.size()] = 0;
280 auto const mcret = memcpy(data, sl.data(), sl.size());
281 auto const ret = reinterpret_cast<StringData*>(mcret) - 1;
282 // Recalculating ret from mcret avoids a spill.
284 assertx(ret == sd);
285 assertx(ret->m_len == sl.size());
286 assertx(ret->hasExactlyOneRef());
287 assertx(ret->m_hash == 0);
288 assertx(ret->isFlat());
289 assertx(ret->checkSane());
290 return ret;
293 StringData* StringData::Make(const char* data, size_t len, CopyStringMode) {
294 if (UNLIKELY(len > StringData::MaxSize)) {
295 raiseStringLengthExceededError(len);
298 return Make(folly::StringPiece(data, len), CopyString);
301 StringData* StringData::Make(size_t reserveLen) {
302 auto const sd = allocFlat(reserveLen);
303 sd->setSize(0);
305 assertx(sd->hasExactlyOneRef());
306 assertx(sd->isFlat());
307 assertx(sd->checkSane());
308 return sd;
311 StringData* StringData::Make() {
312 return Make(SmallStringReserve);
315 //////////////////////////////////////////////////////////////////////
317 StringData* StringData::Make(char* data, size_t len, AttachStringMode) {
318 if (UNLIKELY(len > StringData::MaxSize)) {
319 raiseStringLengthExceededError(len);
321 auto const sd = Make(folly::StringPiece(data, len), CopyString);
322 free(data);
323 assertx(sd->checkSane());
324 return sd;
327 StringData* StringData::Make(folly::StringPiece r1, folly::StringPiece r2) {
328 // Undefined behavior if we pass nullptr strings into StringData::Make
329 assertx(r1.data() && r2.data());
330 auto const len = r1.size() + r2.size();
331 auto const sd = allocFlat(len);
332 sd->m_lenAndHash = len; // hash=0
334 auto const data = reinterpret_cast<char*>(sd + 1);
335 memcpy(data, r1.data(), r1.size());
336 memcpy(data + r1.size(), r2.data(), r2.size());
337 data[len] = 0;
339 assertx(sd->hasExactlyOneRef());
340 assertx(sd->isFlat());
341 assertx(sd->checkSane());
342 return sd;
345 StringData* StringData::Make(const StringData* s1, const StringData* s2) {
346 auto const len = s1->m_len + s2->m_len;
347 // `memcpy8()' could overrun the buffer by at most 7 bytes, so we allocate 6
348 // more bytes here, which (together with the trailing 0) makes it safe.
349 auto const sd = allocFlat(len + 6);
350 sd->m_lenAndHash = len; // hash=0
352 auto const data = reinterpret_cast<char*>(sd + 1);
353 auto const next = memcpy8(data, s1->data(), s1->m_len);
354 *memcpy8(next, s2->data(), s2->m_len) = 0;
356 assertx(sd->hasExactlyOneRef());
357 assertx(sd->isFlat());
358 assertx(sd->checkSane());
359 return sd;
362 StringData* StringData::Make(folly::StringPiece s1, const char* lit2) {
363 return Make(s1, folly::StringPiece(lit2, strlen(lit2)));
366 StringData* StringData::Make(folly::StringPiece r1, folly::StringPiece r2,
367 folly::StringPiece r3) {
368 auto const len = r1.size() + r2.size() + r3.size();
369 auto const sd = allocFlat(len);
370 sd->m_lenAndHash = len; // hash=0
372 auto p = reinterpret_cast<char*>(sd + 1);
373 p = static_cast<char*>(memcpy(p, r1.data(), r1.size()));
374 p = static_cast<char*>(memcpy(p + r1.size(), r2.data(), r2.size()));
375 p = static_cast<char*>(memcpy(p + r2.size(), r3.data(), r3.size()));
376 p[r3.size()] = 0;
378 assertx(sd->hasExactlyOneRef());
379 assertx(sd->isFlat());
380 assertx(sd->checkSane());
381 return sd;
384 StringData* StringData::Make(folly::StringPiece r1, folly::StringPiece r2,
385 folly::StringPiece r3, folly::StringPiece r4) {
386 auto const len = r1.size() + r2.size() + r3.size() + r4.size();
387 auto const sd = allocFlat(len);
388 sd->m_lenAndHash = len; // hash=0
390 auto p = reinterpret_cast<char*>(sd + 1);
391 p = static_cast<char*>(memcpy(p, r1.data(), r1.size()));
392 p = static_cast<char*>(memcpy(p + r1.size(), r2.data(), r2.size()));
393 p = static_cast<char*>(memcpy(p + r2.size(), r3.data(), r3.size()));
394 p = static_cast<char*>(memcpy(p + r3.size(), r4.data(), r4.size()));
395 p[r4.size()] = 0;
397 assertx(sd->hasExactlyOneRef());
398 assertx(sd->isFlat());
399 assertx(sd->checkSane());
400 return sd;
403 //////////////////////////////////////////////////////////////////////
405 ALWAYS_INLINE void StringData::enlist() {
406 assertx(isProxy());
407 auto& head = tl_heap->getStringList();
408 // insert after head
409 auto const next = head.next;
410 auto& payload = *proxy();
411 assertx(uintptr_t(next) != kMallocFreeWord);
412 payload.node.next = next;
413 payload.node.prev = &head;
414 next->prev = head.next = &payload.node;
417 StringData* StringData::MakeProxy(const APCString* apcstr) {
418 #ifdef NO_M_DATA
419 always_assert(false);
420 not_reached();
421 #else
422 assertx(!apcExtension::UseUncounted);
423 // No need to check if len > MaxSize, because if it were we'd never
424 // have made the StringData in the APCVariant without throwing.
425 assertx(size_t(apcstr->getStringData()->size()) <= size_t(MaxSize));
427 auto const sd = static_cast<StringData*>(
428 tl_heap->mallocSmallSize(sizeof(StringData) + sizeof(Proxy))
430 auto const data = apcstr->getStringData();
431 sd->m_data = const_cast<char*>(data->m_data);
432 sd->initHeader(*data, OneReference);
433 sd->m_lenAndHash = data->m_lenAndHash;
434 sd->proxy()->apcstr = apcstr;
435 sd->enlist();
436 apcstr->reference();
438 assertx(sd->m_len == data->size());
439 assertx(sd->m_aux16 == data->m_aux16);
440 assertx(sd->m_kind == HeaderKind::String);
441 assertx(sd->hasExactlyOneRef());
442 assertx(sd->m_hash == data->m_hash);
443 assertx(sd->isProxy());
444 assertx(sd->checkSane());
445 return sd;
446 #endif
449 void StringData::unProxy() {
450 assertx(isProxy());
451 proxy()->apcstr->unreference();
452 delist();
455 NEVER_INLINE
456 void StringData::releaseProxy() {
457 unProxy();
458 tl_heap->freeSmallSize(this, sizeof(StringData) + sizeof(Proxy));
461 void StringData::release() noexcept {
462 fixCountForRelease();
463 assertx(isRefCounted());
464 assertx(checkSane());
465 if (UNLIKELY(!isFlat())) {
466 releaseProxy();
467 AARCH64_WALKABLE_FRAME();
468 return;
470 tl_heap->objFreeIndex(this, m_aux16);
471 AARCH64_WALKABLE_FRAME();
474 //////////////////////////////////////////////////////////////////////
476 #define ALIASING_APPEND_ASSERT(ptr, len) \
477 assertx(uintptr_t(ptr) <= uintptr_t(data()) || \
478 uintptr_t(ptr) >= uintptr_t(data() + capacity() + 1)); \
479 assertx(ptr != data() || len <= m_len);
481 StringData* StringData::append(folly::StringPiece range) {
482 assertx(!isImmutable() && !hasMultipleRefs());
484 auto s = range.data();
485 auto const len = range.size();
486 if (len == 0) return this;
487 auto const newLen = size_t(m_len) + size_t(len);
489 if (UNLIKELY(newLen > MaxSize)) {
490 raiseStringLengthExceededError(newLen);
494 * We may have an aliasing append. We don't allow appending with an
495 * interior pointer, although we may be asked to append less than
496 * the whole string in an aliasing situation.
498 ALIASING_APPEND_ASSERT(s, len);
500 auto const requestLen = static_cast<uint32_t>(newLen);
501 auto const target = UNLIKELY(isProxy()) ? escalate(requestLen)
502 : reserve(requestLen);
503 memcpy(target->mutableData() + m_len, s, len);
504 target->setSize(newLen);
505 assertx(target->checkSane());
507 return target;
510 StringData* StringData::append(folly::StringPiece r1, folly::StringPiece r2) {
511 assertx(!isImmutable() && !hasMultipleRefs());
513 auto const len = r1.size() + r2.size();
515 if (len == 0) return this;
516 if (UNLIKELY(size_t(m_len) + size_t(len) > MaxSize)) {
517 raiseStringLengthExceededError(size_t(len) + size_t(m_len));
520 auto const newLen = m_len + len;
523 * We may have an aliasing append. We don't allow appending with an
524 * interior pointer, although we may be asked to append less than
525 * the whole string in an aliasing situation.
527 ALIASING_APPEND_ASSERT(r1.data(), r1.size());
528 ALIASING_APPEND_ASSERT(r2.data(), r2.size());
530 auto const target = UNLIKELY(isProxy()) ? escalate(newLen)
531 : reserve(newLen);
534 * memcpy is safe even if it's a self append---the regions will be
535 * disjoint, since rN.data() can't point past the start of our source
536 * pointer, and rN.size() is smaller than the old length.
538 void* p = target->mutableData();
539 p = memcpy((char*)p + m_len, r1.data(), r1.size());
540 memcpy((char*)p + r1.size(), r2.data(), r2.size());
542 target->setSize(newLen);
543 assertx(target->checkSane());
545 return target;
548 StringData* StringData::append(folly::StringPiece r1,
549 folly::StringPiece r2,
550 folly::StringPiece r3) {
551 assertx(!isImmutable() && !hasMultipleRefs());
553 auto const len = r1.size() + r2.size() + r3.size();
555 if (len == 0) return this;
556 if (UNLIKELY(size_t(m_len) + size_t(len) > MaxSize)) {
557 raiseStringLengthExceededError(size_t(len) + size_t(m_len));
560 auto const newLen = m_len + len;
563 * We may have an aliasing append. We don't allow appending with an
564 * interior pointer, although we may be asked to append less than
565 * the whole string in an aliasing situation.
567 ALIASING_APPEND_ASSERT(r1.data(), r1.size());
568 ALIASING_APPEND_ASSERT(r2.data(), r2.size());
569 ALIASING_APPEND_ASSERT(r3.data(), r3.size());
571 auto const target = UNLIKELY(isProxy()) ? escalate(newLen)
572 : reserve(newLen);
575 * memcpy is safe even if it's a self append---the regions will be
576 * disjoint, since rN.data() can't point past the start of our source
577 * pointer, and rN.size() is smaller than the old length.
579 void* p = target->mutableData();
580 p = memcpy((char*)p + m_len, r1.data(), r1.size());
581 p = memcpy((char*)p + r1.size(), r2.data(), r2.size());
582 memcpy((char*)p + r2.size(), r3.data(), r3.size());
584 target->setSize(newLen);
585 assertx(target->checkSane());
587 return target;
590 #undef ALIASING_APPEND_ASSERT
592 //////////////////////////////////////////////////////////////////////
594 StringData* StringData::reserve(size_t cap) {
595 assertx(!isImmutable() && !hasMultipleRefs());
596 assertx(isFlat());
598 if (cap <= capacity()) return this;
600 cap = std::min(cap + cap / 4, size_t(MaxSize));
601 auto const sd = allocFlat(cap);
603 // Request-allocated StringData are always aligned at 16 bytes, thus it is
604 // safe to copy in 16-byte groups.
605 #ifdef NO_M_DATA
606 // layout: [header][m_lenAndHash][...data]
607 sd->m_lenAndHash = m_lenAndHash;
608 // This copies the characters (m_len bytes), and the trailing zero (1 byte)
609 memcpy16_inline(sd+1, this+1, (m_len + 1 + 15) & ~0xF);
610 assertx(reinterpret_cast<uintptr_t>(this+1) % 16 == 0);
611 #else
612 // layout: [header][m_data][m_lenAndHash][...data]
613 // This copies m_lenAndHash (8 bytes), the characters (m_len bytes),
614 // and the trailing zero (1 byte).
615 memcpy16_inline(&sd->m_lenAndHash, &m_lenAndHash,
616 (m_len + 8 + 1 + 15) & ~0xF);
617 assertx(reinterpret_cast<uintptr_t>(&m_lenAndHash) + 8 ==
618 reinterpret_cast<uintptr_t>(m_data));
619 assertx(reinterpret_cast<uintptr_t>(&m_lenAndHash) % 16 == 0);
620 #endif
622 assertx(sd->hasExactlyOneRef());
623 assertx(sd->isFlat());
624 assertx(sd->checkSane());
625 return sd;
628 StringData* StringData::shrinkImpl(size_t len) {
629 assertx(!isImmutable() && !hasMultipleRefs());
630 assertx(isFlat());
631 assertx(len <= capacity());
633 auto const sd = allocFlat(len);
634 sd->m_lenAndHash = len;
635 auto const src = static_cast<void*>(this + 1);
636 auto const dst = static_cast<void*>(sd + 1);
637 *memcpy8(dst, src, len) = 0;
639 assertx(sd->checkSane());
640 return sd;
643 StringData* StringData::shrink(size_t len) {
644 assertx(!isImmutable() && !hasMultipleRefs());
645 if (capacity() - len > kMinShrinkThreshold) {
646 return shrinkImpl(len);
648 assertx(len < MaxSize);
649 setSize(len);
650 return this;
653 // State transition from Mode::Shared to Mode::Flat.
654 StringData* StringData::escalate(size_t cap) {
655 assertx(isProxy() && !isStatic() && cap >= m_len);
657 auto const sd = allocFlat(cap);
658 sd->m_lenAndHash = m_lenAndHash;
659 auto const sd_data = reinterpret_cast<char*>(sd + 1);
660 *memcpy8(sd_data, data(), m_len) = 0;
662 assertx(sd->hasExactlyOneRef());
663 assertx(sd->isFlat());
664 assertx(sd->checkSane());
665 return sd;
668 void StringData::dump() const {
669 auto s = slice();
671 printf("StringData(%d) (%s%s%s%d): [", m_count,
672 isProxy() ? "proxy " : "",
673 isStatic() ? "static " : "",
674 isUncounted() ? "uncounted " : "",
675 static_cast<int>(s.size()));
676 for (uint32_t i = 0; i < s.size(); i++) {
677 char ch = s.data()[i];
678 if (isprint(ch)) {
679 printf("%c", ch);
680 } else {
681 printf("\\x%02x", ch);
684 printf("]\n");
687 StringData* StringData::getChar(int offset) const {
688 if (offset >= 0 && offset < size()) {
689 return makeStaticString(data()[offset]);
691 raise_notice("Uninitialized string offset: %d", offset);
692 return staticEmptyString();
695 StringData* StringData::increment() {
696 assertx(!isImmutable() && !hasMultipleRefs());
697 assertx(!empty());
699 auto const sd = UNLIKELY(isProxy())
700 ? escalate(m_len + 1)
701 : reserve(m_len + 1);
702 sd->incrementHelper();
703 return sd;
706 void StringData::incrementHelper() {
707 raise_notice("Increment on string '%s'", data());
708 m_hash = 0;
710 enum class CharKind {
711 UNKNOWN,
712 LOWER_CASE,
713 UPPER_CASE,
714 NUMERIC
717 auto const len = m_len;
718 auto const s = mutableData();
719 int carry = 0;
720 int pos = len - 1;
721 auto last = CharKind::UNKNOWN; // Shut up the compiler warning
722 int ch;
724 while (pos >= 0) {
725 ch = s[pos];
726 if (ch >= 'a' && ch <= 'z') {
727 if (ch == 'z') {
728 s[pos] = 'a';
729 carry=1;
730 } else {
731 s[pos]++;
732 carry=0;
734 last = CharKind::LOWER_CASE;
735 } else if (ch >= 'A' && ch <= 'Z') {
736 if (ch == 'Z') {
737 s[pos] = 'A';
738 carry=1;
739 } else {
740 s[pos]++;
741 carry=0;
743 last = CharKind::UPPER_CASE;
744 } else if (ch >= '0' && ch <= '9') {
745 if (ch == '9') {
746 s[pos] = '0';
747 carry=1;
748 } else {
749 s[pos]++;
750 carry=0;
752 last = CharKind::NUMERIC;
753 } else {
754 carry=0;
755 break;
757 if (carry == 0) {
758 break;
760 pos--;
763 if (carry) {
764 if (UNLIKELY(len + 1 > MaxSize)) {
765 raiseStringLengthExceededError(len + 1);
768 assertx(len + 1 <= capacity());
769 memmove(s + 1, s, len);
770 s[len + 1] = '\0';
771 m_len = len + 1;
773 switch (last) {
774 case CharKind::NUMERIC:
775 s[0] = '1';
776 break;
777 case CharKind::UPPER_CASE:
778 s[0] = 'A';
779 break;
780 case CharKind::LOWER_CASE:
781 s[0] = 'a';
782 break;
783 default:
784 break;
789 void StringData::preCompute() {
790 auto s = slice();
791 m_hash = hash_string_i_unsafe(s.data(), s.size());
792 assertx(m_hash >= 0);
793 if (s.size() > 0 &&
794 (is_numeric_string(s.data(), s.size(), nullptr, nullptr,
795 1, nullptr) == KindOfNull)) {
796 m_hash |= STRHASH_MSB;
800 #if (!defined(__SSE4_2__) && !defined(ENABLE_AARCH64_CRC)) || \
801 defined(NO_HWCRC) || !defined(NO_M_DATA) || defined(_MSC_VER)
802 // This function is implemented directly in ASM in string-data-*.S otherwise.
803 NEVER_INLINE strhash_t StringData::hashHelper() const {
804 assertx(!isProxy());
805 strhash_t h = hash_string_i_unsafe(data(), m_len);
806 assertx(h >= 0);
807 m_hash |= h;
808 return h;
810 #endif
812 ///////////////////////////////////////////////////////////////////////////////
813 // type conversions
815 DataType StringData::isNumericWithVal(int64_t &lval, double &dval,
816 int allow_errors, int* overflow) const {
817 if (m_hash < 0) return KindOfNull;
818 DataType ret = KindOfNull;
819 auto s = slice();
820 if (s.size()) {
821 ret = is_numeric_string(
822 s.data(),
823 s.size(),
824 &lval,
825 &dval,
826 allow_errors,
827 overflow
829 if (ret == KindOfNull && allow_errors) {
830 // a proxy string has its hash precomputed - so it can't
831 // suddenly go from being numeric to not-numeric
832 assertx(!isProxy());
833 m_hash |= STRHASH_MSB;
836 return ret;
839 bool StringData::isNumeric() const {
840 if (m_hash < 0) return false;
841 int64_t lval; double dval;
842 DataType ret = isNumericWithVal(lval, dval, 0);
843 switch (ret) {
844 case KindOfNull:
845 return false;
846 case KindOfInt64:
847 case KindOfDouble:
848 return true;
849 case KindOfUninit:
850 case KindOfBoolean:
851 case KindOfPersistentString:
852 case KindOfString:
853 case KindOfPersistentVec:
854 case KindOfVec:
855 case KindOfPersistentDict:
856 case KindOfDict:
857 case KindOfPersistentKeyset:
858 case KindOfKeyset:
859 case KindOfObject:
860 case KindOfResource:
861 case KindOfRFunc:
862 case KindOfFunc:
863 case KindOfClass:
864 case KindOfLazyClass:
865 case KindOfClsMeth:
866 case KindOfRClsMeth:
867 case KindOfRecord:
868 break;
870 not_reached();
873 bool StringData::isInteger() const {
874 if (m_hash < 0) return false;
875 int64_t lval; double dval;
876 DataType ret = isNumericWithVal(lval, dval, 0);
877 switch (ret) {
878 case KindOfNull:
879 case KindOfDouble:
880 return false;
881 case KindOfInt64:
882 return true;
883 case KindOfUninit:
884 case KindOfBoolean:
885 case KindOfPersistentString:
886 case KindOfString:
887 case KindOfPersistentVec:
888 case KindOfVec:
889 case KindOfPersistentDict:
890 case KindOfDict:
891 case KindOfPersistentKeyset:
892 case KindOfKeyset:
893 case KindOfObject:
894 case KindOfResource:
895 case KindOfRFunc:
896 case KindOfFunc:
897 case KindOfClass:
898 case KindOfLazyClass:
899 case KindOfClsMeth:
900 case KindOfRClsMeth:
901 case KindOfRecord:
902 break;
904 not_reached();
907 bool StringData::toBoolean() const {
908 return !empty() && !isZero();
911 int64_t StringData::toInt64(int base /* = 10 */) const {
912 return strtoll(data(), nullptr, base);
915 double StringData::toDouble() const {
916 auto s = slice();
917 if (s.size()) return zend_strtod(s.data(), nullptr);
918 return 0;
921 DataType StringData::toNumeric(int64_t &lval, double &dval) const {
922 if (m_hash < 0) return KindOfString;
923 DataType ret = isNumericWithVal(lval, dval, 0);
924 if (ret == KindOfInt64 || ret == KindOfDouble) return ret;
925 return KindOfString;
928 ///////////////////////////////////////////////////////////////////////////////
929 // comparisons
931 bool StringData::equal(const StringData *s) const {
932 assertx(s);
933 if (s == this) return true;
934 int ret;
936 if (!(m_hash < 0 || s->m_hash < 0)) {
937 ret = numericCompare(s, true);
938 if (ret >= -1) {
939 return ret == 0;
942 return same(s);
945 int StringData::numericCompare(const StringData *v2, bool eq) const {
946 assertx(v2);
948 int oflow1, oflow2;
949 int64_t lval1, lval2;
950 double dval1, dval2;
951 DataType ret1, ret2;
952 if ((ret1 = isNumericWithVal(lval1, dval1, 0, &oflow1)) == KindOfNull ||
953 (ret1 == KindOfDouble && !std::isfinite(dval1)) ||
954 (ret2 = v2->isNumericWithVal(lval2, dval2, 0, &oflow2)) == KindOfNull ||
955 (ret2 == KindOfDouble && !std::isfinite(dval2))) {
956 return -2;
958 if (oflow1 && oflow1 == oflow2 && dval1 == dval2) {
959 return -2; // overflow in same direction, comparison will be inaccurate
961 if (ret1 == KindOfInt64 && ret2 == KindOfInt64) {
962 if (lval1 > lval2) return 1;
963 if (lval1 == lval2) return 0;
964 return -1;
966 if (ret1 == KindOfDouble && ret2 == KindOfDouble) {
967 if (dval1 > dval2) return 1;
968 if (dval1 == dval2) return 0;
969 return -1;
972 if (ret1 == KindOfDouble) {
973 assertx(ret2 == KindOfInt64);
974 if (oflow1) {
975 return oflow1;
977 dval2 = (double)lval2;
978 } else {
979 assertx(ret1 == KindOfInt64);
980 assertx(ret2 == KindOfDouble);
981 if (oflow2) {
982 return -oflow2;
984 dval1 = (double)lval1;
987 if (dval1 > dval2) return 1;
988 if (dval1 == dval2) {
989 if (eq) handleConvNoticeForEq("int", "float");
990 return 0;
992 return -1;
995 int StringData::compare(const StringData *v2) const {
996 assertx(v2);
998 if (v2 == this) return 0;
1000 int ret = numericCompare(v2, false);
1001 if (ret < -1) {
1002 int len1 = size();
1003 int len2 = v2->size();
1004 int len = len1 < len2 ? len1 : len2;
1005 ret = memcmp(data(), v2->data(), len);
1006 if (ret) return ret;
1007 if (len1 == len2) return 0;
1008 return len < len1 ? 1 : -1;
1010 return ret;
1013 StringData*
1014 StringData::substr(int start, int length /* = StringData::MaxSize */) {
1015 if (start < 0 || start >= size() || length <= 0) {
1016 return staticEmptyString();
1019 auto const max_len = size() - start;
1020 if (length > max_len) {
1021 length = max_len;
1024 assertx(length > 0);
1025 if (UNLIKELY(length == size())) {
1026 incRefCount();
1027 return this;
1029 if (UNLIKELY(length == 1)) {
1030 return makeStaticString(data()[start]);
1032 return StringData::Make(data() + start, length, CopyString);
1035 ///////////////////////////////////////////////////////////////////////////////
1036 // Debug
1038 std::string StringData::toCppString() const {
1039 auto s = slice();
1040 return std::string(s.data(), s.size());
1043 bool StringData::checkSane() const {
1044 static_assert(sizeof(StringData) == (use_lowptr ? 16 : 24),
1045 "StringData size changed---update assertion if you mean it");
1046 static_assert(size_t(MaxSize) <= size_t(INT_MAX), "Beware int wraparound");
1047 #ifdef NO_M_DATA
1048 static_assert(sizeof(StringData) == SD_DATA, "");
1049 static_assert(offsetof(StringData, m_len) == SD_LEN, "");
1050 static_assert(offsetof(StringData, m_hash) == SD_HASH, "");
1051 #endif
1052 assertx(kindIsValid());
1053 assertx(uint32_t(size()) <= MaxSize);
1054 assertx(size() >= 0);
1055 assertx(IMPLIES(isSymbol(), isStatic()));
1056 if (!isImmutable()) {
1057 assertx(size() <= capacity());
1058 assertx(capacity() <= MaxSize);
1060 // isFlat() and isProxy() both check whether m_data == payload(),
1061 // which guarantees by definition that isFlat() != isProxy()
1062 return true;
1065 //////////////////////////////////////////////////////////////////////