2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/base/string-data.h"
22 #include "hphp/util/alloc.h"
23 #include "hphp/util/safe-cast.h"
24 #include "hphp/util/stacktrace-profiler.h"
26 #include "hphp/runtime/base/apc-handle-defs.h"
27 #include "hphp/runtime/base/apc-string.h"
28 #include "hphp/runtime/base/builtin-functions.h"
29 #include "hphp/runtime/base/exceptions.h"
30 #include "hphp/runtime/base/runtime-error.h"
31 #include "hphp/runtime/base/runtime-option.h"
32 #include "hphp/runtime/base/tv-uncounted.h"
33 #include "hphp/runtime/base/zend-functions.h"
34 #include "hphp/runtime/base/zend-string.h"
35 #include "hphp/runtime/ext/apc/ext_apc.h"
37 #include "hphp/zend/zend-strtod.h"
41 //////////////////////////////////////////////////////////////////////
43 NEVER_INLINE
void raiseStringLengthExceededError(size_t len
) {
44 raise_error("String length exceeded: %zu > %u", len
, StringData::MaxSize
);
47 // Allocate, initialize `m_data' and HeapObject, but not `m_lenAndHash'.
48 ALWAYS_INLINE StringData
* allocFlat(size_t len
) {
49 if (UNLIKELY(len
> StringData::MaxSize
)) {
50 raiseStringLengthExceededError(len
);
52 auto const sizeIndex
= MemoryManager::size2Index(len
+ kStringOverhead
);
53 auto sd
= static_cast<StringData
*>(tl_heap
->objMallocIndex(sizeIndex
));
54 // Refcount initialized to 1.
55 sd
->initHeader_16(HeaderKind::String
, OneReference
, sizeIndex
);
56 assertx(sd
->capacity() >= len
);
58 sd
->m_data
= reinterpret_cast<char*>(sd
+ 1);
63 //////////////////////////////////////////////////////////////////////
68 >::type s_theEmptyString
;
70 //////////////////////////////////////////////////////////////////////
73 std::atomic
<bool> s_symbols_loaded
;
75 SymbolPrefix
* getSymbolPrefix(StringData
* sd
) {
76 assertx(sd
->isSymbol());
77 return reinterpret_cast<SymbolPrefix
*>(sd
) - 1;
79 const SymbolPrefix
* getSymbolPrefix(const StringData
* sd
) {
80 assertx(sd
->isSymbol());
81 return getSymbolPrefix(const_cast<StringData
*>(sd
));
85 bool StringData::isSymbol() const {
86 return (m_aux16
>> 8) & kIsSymbolMask
;
89 void StringData::markSymbolsLoaded() {
90 s_symbols_loaded
.store(true, std::memory_order_release
);
93 Class
* StringData::getCachedClass() const {
94 return getSymbolPrefix(this)->cls
;
97 NamedEntity
* StringData::getNamedEntity() const {
98 return getSymbolPrefix(this)->ne
;
101 void StringData::setCachedClass(Class
* cls
) {
102 auto const prefix
= getSymbolPrefix(this);
103 assertx(IMPLIES(prefix
->cls
, prefix
->cls
== cls
));
107 void StringData::setNamedEntity(NamedEntity
* ne
) {
108 auto const prefix
= getSymbolPrefix(this);
109 assertx(IMPLIES(prefix
->ne
, prefix
->ne
== ne
));
113 ptrdiff_t StringData::isSymbolOffset() {
114 return offsetof(StringData
, m_aux16
) + 1;
117 ptrdiff_t StringData::cachedClassOffset() {
118 return offsetof(SymbolPrefix
, cls
) - sizeof(SymbolPrefix
);
121 //////////////////////////////////////////////////////////////////////
123 ptrdiff_t StringData::colorOffset() {
124 return offsetof(StringData
, m_aux16
);
127 uint16_t StringData::color() const {
128 return m_aux16
& kColorMask
;
131 void StringData::setColor(uint16_t color
) {
132 assertx((color
& ~kColorMask
) == 0);
136 //////////////////////////////////////////////////////////////////////
138 // Create either a static or an uncounted string.
139 // Diffrence between static and uncounted is in the lifetime
140 // of the string. Static are alive for the lifetime of the process.
141 // Uncounted are not ref counted but will be deleted at some point.
142 template <bool trueStatic
> ALWAYS_INLINE
143 MemBlock
StringData::AllocateShared(folly::StringPiece sl
) {
144 if (UNLIKELY(sl
.size() > StringData::MaxSize
)) {
145 raiseStringLengthExceededError(sl
.size());
149 trueStatic
&& !s_symbols_loaded
.load(std::memory_order_acquire
);
151 auto const extra
= symbol
? sizeof(SymbolPrefix
) : 0;
152 auto const bytes
= sl
.size() + kStringOverhead
+ extra
;
153 auto const ptr
= trueStatic
? static_alloc(bytes
) : AllocUncounted(bytes
);
154 return MemBlock
{ptr
, bytes
};
157 template <bool trueStatic
> ALWAYS_INLINE
158 StringData
* StringData::MakeSharedAt(folly::StringPiece sl
, MemBlock range
) {
159 assertx(range
.size
>= sl
.size() + kStringOverhead
);
160 auto const symbol
= trueStatic
&&
161 !s_symbols_loaded
.load(std::memory_order_acquire
) &&
162 (range
.size
>= sl
.size() + kStringOverhead
+ sizeof(SymbolPrefix
));
163 auto const extra
= symbol
? sizeof(SymbolPrefix
) : 0;
164 StringData
* sd
= reinterpret_cast<StringData
*>(
165 reinterpret_cast<uintptr_t>(range
.ptr
) + extra
167 auto const data
= reinterpret_cast<char*>(sd
+ 1);
172 auto const count
= trueStatic
? StaticValue
: UncountedValue
;
174 auto constexpr aux
= kIsSymbolMask
<< 8 | kInvalidColor
;
175 sd
->initHeader_16(HeaderKind::String
, count
, aux
);
176 getSymbolPrefix(sd
)->cls
= nullptr;
177 getSymbolPrefix(sd
)->ne
= nullptr;
179 sd
->initHeader_16(HeaderKind::String
, count
, kInvalidColor
);
181 sd
->m_len
= sl
.size(); // m_hash is computed soon.
184 auto const mcret
= memcpy(data
, sl
.data(), sl
.size());
185 auto const ret
= reinterpret_cast<StringData
*>(mcret
) - 1;
186 // Recalculating ret from mcret avoids a spill.
187 ret
->preCompute(); // get m_hash right
190 assertx(ret
->isFlat());
191 assertx(trueStatic
? ret
->isStatic() : ret
->isUncounted());
192 assertx(ret
->isSymbol() == symbol
);
193 assertx(ret
->checkSane());
197 StringData
* StringData::MakeStaticAt(folly::StringPiece sl
, MemBlock range
) {
198 return MakeSharedAt
<true>(sl
, range
);
201 StringData
* StringData::MakeStatic(folly::StringPiece sl
) {
202 assertx(StaticString::s_globalInit
);
203 return MakeStaticAt(sl
, AllocateShared
<true>(sl
));
206 StringData
* StringData::MakeUncounted(folly::StringPiece sl
) {
207 return MakeSharedAt
<false>(sl
, AllocateShared
<false>(sl
));
210 StringData
* StringData::MakeEmpty() {
211 return MakeStaticAt(folly::StringPiece
{""},
212 MemBlock
{&s_theEmptyString
, sizeof(s_theEmptyString
)});
215 void StringData::destructStatic() {
216 assertx(checkSane() && isStatic());
219 static_try_free(reinterpret_cast<SymbolPrefix
*>(this) - 1,
220 size() + kStringOverhead
+ sizeof(SymbolPrefix
));
222 static_try_free(this, size() + kStringOverhead
);
226 void StringData::ReleaseUncounted(StringData
* str
) {
227 assertx(str
->isFlat());
228 assertx(str
->checkSane());
229 assertx(!str
->uncountedCowCheck());
230 FreeUncounted(str
, str
->size() + kStringOverhead
);
233 //////////////////////////////////////////////////////////////////////
235 ALWAYS_INLINE
void StringData::delist() {
237 auto& payload
= *proxy();
238 auto const next
= payload
.node
.next
;
239 auto const prev
= payload
.node
.prev
;
240 assertx(uintptr_t(next
) != kMallocFreeWord
);
241 assertx(uintptr_t(prev
) != kMallocFreeWord
);
246 unsigned StringData::sweepAll() {
247 auto& head
= tl_heap
->getStringList();
249 for (StringDataNode
*next
, *n
= head
.next
; n
!= &head
; n
= next
) {
252 assertx(next
&& uintptr_t(next
) != kSmallFreeWord
);
253 assertx(next
&& uintptr_t(next
) != kMallocFreeWord
);
254 auto const s
= node2str(n
);
255 assertx(s
->isProxy());
256 s
->proxy()->apcstr
->unreference();
258 head
.next
= head
.prev
= &head
;
262 //////////////////////////////////////////////////////////////////////
264 StringData
* StringData::Make(const StringData
* s
, CopyStringMode
) {
265 auto const sd
= allocFlat(s
->m_len
);
266 sd
->m_lenAndHash
= s
->m_lenAndHash
;
267 auto const data
= static_cast<void*>(sd
+ 1);
268 *memcpy8(data
, s
->data(), s
->m_len
) = 0;
270 assertx(sd
->same(s
));
274 StringData
* StringData::Make(folly::StringPiece sl
, CopyStringMode
) {
275 auto const sd
= allocFlat(sl
.size());
276 sd
->m_lenAndHash
= sl
.size(); // hash=0
277 auto const data
= reinterpret_cast<char*>(sd
+ 1);
280 auto const mcret
= memcpy(data
, sl
.data(), sl
.size());
281 auto const ret
= reinterpret_cast<StringData
*>(mcret
) - 1;
282 // Recalculating ret from mcret avoids a spill.
285 assertx(ret
->m_len
== sl
.size());
286 assertx(ret
->hasExactlyOneRef());
287 assertx(ret
->m_hash
== 0);
288 assertx(ret
->isFlat());
289 assertx(ret
->checkSane());
293 StringData
* StringData::Make(const char* data
, size_t len
, CopyStringMode
) {
294 if (UNLIKELY(len
> StringData::MaxSize
)) {
295 raiseStringLengthExceededError(len
);
298 return Make(folly::StringPiece(data
, len
), CopyString
);
301 StringData
* StringData::Make(size_t reserveLen
) {
302 auto const sd
= allocFlat(reserveLen
);
305 assertx(sd
->hasExactlyOneRef());
306 assertx(sd
->isFlat());
307 assertx(sd
->checkSane());
311 StringData
* StringData::Make() {
312 return Make(SmallStringReserve
);
315 //////////////////////////////////////////////////////////////////////
317 StringData
* StringData::Make(char* data
, size_t len
, AttachStringMode
) {
318 if (UNLIKELY(len
> StringData::MaxSize
)) {
319 raiseStringLengthExceededError(len
);
321 auto const sd
= Make(folly::StringPiece(data
, len
), CopyString
);
323 assertx(sd
->checkSane());
327 StringData
* StringData::Make(folly::StringPiece r1
, folly::StringPiece r2
) {
328 // Undefined behavior if we pass nullptr strings into StringData::Make
329 assertx(r1
.data() && r2
.data());
330 auto const len
= r1
.size() + r2
.size();
331 auto const sd
= allocFlat(len
);
332 sd
->m_lenAndHash
= len
; // hash=0
334 auto const data
= reinterpret_cast<char*>(sd
+ 1);
335 memcpy(data
, r1
.data(), r1
.size());
336 memcpy(data
+ r1
.size(), r2
.data(), r2
.size());
339 assertx(sd
->hasExactlyOneRef());
340 assertx(sd
->isFlat());
341 assertx(sd
->checkSane());
345 StringData
* StringData::Make(const StringData
* s1
, const StringData
* s2
) {
346 auto const len
= s1
->m_len
+ s2
->m_len
;
347 // `memcpy8()' could overrun the buffer by at most 7 bytes, so we allocate 6
348 // more bytes here, which (together with the trailing 0) makes it safe.
349 auto const sd
= allocFlat(len
+ 6);
350 sd
->m_lenAndHash
= len
; // hash=0
352 auto const data
= reinterpret_cast<char*>(sd
+ 1);
353 auto const next
= memcpy8(data
, s1
->data(), s1
->m_len
);
354 *memcpy8(next
, s2
->data(), s2
->m_len
) = 0;
356 assertx(sd
->hasExactlyOneRef());
357 assertx(sd
->isFlat());
358 assertx(sd
->checkSane());
362 StringData
* StringData::Make(folly::StringPiece s1
, const char* lit2
) {
363 return Make(s1
, folly::StringPiece(lit2
, strlen(lit2
)));
366 StringData
* StringData::Make(folly::StringPiece r1
, folly::StringPiece r2
,
367 folly::StringPiece r3
) {
368 auto const len
= r1
.size() + r2
.size() + r3
.size();
369 auto const sd
= allocFlat(len
);
370 sd
->m_lenAndHash
= len
; // hash=0
372 auto p
= reinterpret_cast<char*>(sd
+ 1);
373 p
= static_cast<char*>(memcpy(p
, r1
.data(), r1
.size()));
374 p
= static_cast<char*>(memcpy(p
+ r1
.size(), r2
.data(), r2
.size()));
375 p
= static_cast<char*>(memcpy(p
+ r2
.size(), r3
.data(), r3
.size()));
378 assertx(sd
->hasExactlyOneRef());
379 assertx(sd
->isFlat());
380 assertx(sd
->checkSane());
384 StringData
* StringData::Make(folly::StringPiece r1
, folly::StringPiece r2
,
385 folly::StringPiece r3
, folly::StringPiece r4
) {
386 auto const len
= r1
.size() + r2
.size() + r3
.size() + r4
.size();
387 auto const sd
= allocFlat(len
);
388 sd
->m_lenAndHash
= len
; // hash=0
390 auto p
= reinterpret_cast<char*>(sd
+ 1);
391 p
= static_cast<char*>(memcpy(p
, r1
.data(), r1
.size()));
392 p
= static_cast<char*>(memcpy(p
+ r1
.size(), r2
.data(), r2
.size()));
393 p
= static_cast<char*>(memcpy(p
+ r2
.size(), r3
.data(), r3
.size()));
394 p
= static_cast<char*>(memcpy(p
+ r3
.size(), r4
.data(), r4
.size()));
397 assertx(sd
->hasExactlyOneRef());
398 assertx(sd
->isFlat());
399 assertx(sd
->checkSane());
403 //////////////////////////////////////////////////////////////////////
405 ALWAYS_INLINE
void StringData::enlist() {
407 auto& head
= tl_heap
->getStringList();
409 auto const next
= head
.next
;
410 auto& payload
= *proxy();
411 assertx(uintptr_t(next
) != kMallocFreeWord
);
412 payload
.node
.next
= next
;
413 payload
.node
.prev
= &head
;
414 next
->prev
= head
.next
= &payload
.node
;
417 StringData
* StringData::MakeProxy(const APCString
* apcstr
) {
419 always_assert(false);
422 assertx(!apcExtension::UseUncounted
);
423 // No need to check if len > MaxSize, because if it were we'd never
424 // have made the StringData in the APCVariant without throwing.
425 assertx(size_t(apcstr
->getStringData()->size()) <= size_t(MaxSize
));
427 auto const sd
= static_cast<StringData
*>(
428 tl_heap
->mallocSmallSize(sizeof(StringData
) + sizeof(Proxy
))
430 auto const data
= apcstr
->getStringData();
431 sd
->m_data
= const_cast<char*>(data
->m_data
);
432 sd
->initHeader(*data
, OneReference
);
433 sd
->m_lenAndHash
= data
->m_lenAndHash
;
434 sd
->proxy()->apcstr
= apcstr
;
438 assertx(sd
->m_len
== data
->size());
439 assertx(sd
->m_aux16
== data
->m_aux16
);
440 assertx(sd
->m_kind
== HeaderKind::String
);
441 assertx(sd
->hasExactlyOneRef());
442 assertx(sd
->m_hash
== data
->m_hash
);
443 assertx(sd
->isProxy());
444 assertx(sd
->checkSane());
449 void StringData::unProxy() {
451 proxy()->apcstr
->unreference();
456 void StringData::releaseProxy() {
458 tl_heap
->freeSmallSize(this, sizeof(StringData
) + sizeof(Proxy
));
461 void StringData::release() noexcept
{
462 fixCountForRelease();
463 assertx(isRefCounted());
464 assertx(checkSane());
465 if (UNLIKELY(!isFlat())) {
467 AARCH64_WALKABLE_FRAME();
470 tl_heap
->objFreeIndex(this, m_aux16
);
471 AARCH64_WALKABLE_FRAME();
474 //////////////////////////////////////////////////////////////////////
476 #define ALIASING_APPEND_ASSERT(ptr, len) \
477 assertx(uintptr_t(ptr) <= uintptr_t(data()) || \
478 uintptr_t(ptr) >= uintptr_t(data() + capacity() + 1)); \
479 assertx(ptr != data() || len <= m_len);
481 StringData
* StringData::append(folly::StringPiece range
) {
482 assertx(!isImmutable() && !hasMultipleRefs());
484 auto s
= range
.data();
485 auto const len
= range
.size();
486 if (len
== 0) return this;
487 auto const newLen
= size_t(m_len
) + size_t(len
);
489 if (UNLIKELY(newLen
> MaxSize
)) {
490 raiseStringLengthExceededError(newLen
);
494 * We may have an aliasing append. We don't allow appending with an
495 * interior pointer, although we may be asked to append less than
496 * the whole string in an aliasing situation.
498 ALIASING_APPEND_ASSERT(s
, len
);
500 auto const requestLen
= static_cast<uint32_t>(newLen
);
501 auto const target
= UNLIKELY(isProxy()) ? escalate(requestLen
)
502 : reserve(requestLen
);
503 memcpy(target
->mutableData() + m_len
, s
, len
);
504 target
->setSize(newLen
);
505 assertx(target
->checkSane());
510 StringData
* StringData::append(folly::StringPiece r1
, folly::StringPiece r2
) {
511 assertx(!isImmutable() && !hasMultipleRefs());
513 auto const len
= r1
.size() + r2
.size();
515 if (len
== 0) return this;
516 if (UNLIKELY(size_t(m_len
) + size_t(len
) > MaxSize
)) {
517 raiseStringLengthExceededError(size_t(len
) + size_t(m_len
));
520 auto const newLen
= m_len
+ len
;
523 * We may have an aliasing append. We don't allow appending with an
524 * interior pointer, although we may be asked to append less than
525 * the whole string in an aliasing situation.
527 ALIASING_APPEND_ASSERT(r1
.data(), r1
.size());
528 ALIASING_APPEND_ASSERT(r2
.data(), r2
.size());
530 auto const target
= UNLIKELY(isProxy()) ? escalate(newLen
)
534 * memcpy is safe even if it's a self append---the regions will be
535 * disjoint, since rN.data() can't point past the start of our source
536 * pointer, and rN.size() is smaller than the old length.
538 void* p
= target
->mutableData();
539 p
= memcpy((char*)p
+ m_len
, r1
.data(), r1
.size());
540 memcpy((char*)p
+ r1
.size(), r2
.data(), r2
.size());
542 target
->setSize(newLen
);
543 assertx(target
->checkSane());
548 StringData
* StringData::append(folly::StringPiece r1
,
549 folly::StringPiece r2
,
550 folly::StringPiece r3
) {
551 assertx(!isImmutable() && !hasMultipleRefs());
553 auto const len
= r1
.size() + r2
.size() + r3
.size();
555 if (len
== 0) return this;
556 if (UNLIKELY(size_t(m_len
) + size_t(len
) > MaxSize
)) {
557 raiseStringLengthExceededError(size_t(len
) + size_t(m_len
));
560 auto const newLen
= m_len
+ len
;
563 * We may have an aliasing append. We don't allow appending with an
564 * interior pointer, although we may be asked to append less than
565 * the whole string in an aliasing situation.
567 ALIASING_APPEND_ASSERT(r1
.data(), r1
.size());
568 ALIASING_APPEND_ASSERT(r2
.data(), r2
.size());
569 ALIASING_APPEND_ASSERT(r3
.data(), r3
.size());
571 auto const target
= UNLIKELY(isProxy()) ? escalate(newLen
)
575 * memcpy is safe even if it's a self append---the regions will be
576 * disjoint, since rN.data() can't point past the start of our source
577 * pointer, and rN.size() is smaller than the old length.
579 void* p
= target
->mutableData();
580 p
= memcpy((char*)p
+ m_len
, r1
.data(), r1
.size());
581 p
= memcpy((char*)p
+ r1
.size(), r2
.data(), r2
.size());
582 memcpy((char*)p
+ r2
.size(), r3
.data(), r3
.size());
584 target
->setSize(newLen
);
585 assertx(target
->checkSane());
590 #undef ALIASING_APPEND_ASSERT
592 //////////////////////////////////////////////////////////////////////
594 StringData
* StringData::reserve(size_t cap
) {
595 assertx(!isImmutable() && !hasMultipleRefs());
598 if (cap
<= capacity()) return this;
600 cap
= std::min(cap
+ cap
/ 4, size_t(MaxSize
));
601 auto const sd
= allocFlat(cap
);
603 // Request-allocated StringData are always aligned at 16 bytes, thus it is
604 // safe to copy in 16-byte groups.
606 // layout: [header][m_lenAndHash][...data]
607 sd
->m_lenAndHash
= m_lenAndHash
;
608 // This copies the characters (m_len bytes), and the trailing zero (1 byte)
609 memcpy16_inline(sd
+1, this+1, (m_len
+ 1 + 15) & ~0xF);
610 assertx(reinterpret_cast<uintptr_t>(this+1) % 16 == 0);
612 // layout: [header][m_data][m_lenAndHash][...data]
613 // This copies m_lenAndHash (8 bytes), the characters (m_len bytes),
614 // and the trailing zero (1 byte).
615 memcpy16_inline(&sd
->m_lenAndHash
, &m_lenAndHash
,
616 (m_len
+ 8 + 1 + 15) & ~0xF);
617 assertx(reinterpret_cast<uintptr_t>(&m_lenAndHash
) + 8 ==
618 reinterpret_cast<uintptr_t>(m_data
));
619 assertx(reinterpret_cast<uintptr_t>(&m_lenAndHash
) % 16 == 0);
622 assertx(sd
->hasExactlyOneRef());
623 assertx(sd
->isFlat());
624 assertx(sd
->checkSane());
628 StringData
* StringData::shrinkImpl(size_t len
) {
629 assertx(!isImmutable() && !hasMultipleRefs());
631 assertx(len
<= capacity());
633 auto const sd
= allocFlat(len
);
634 sd
->m_lenAndHash
= len
;
635 auto const src
= static_cast<void*>(this + 1);
636 auto const dst
= static_cast<void*>(sd
+ 1);
637 *memcpy8(dst
, src
, len
) = 0;
639 assertx(sd
->checkSane());
643 StringData
* StringData::shrink(size_t len
) {
644 assertx(!isImmutable() && !hasMultipleRefs());
645 if (capacity() - len
> kMinShrinkThreshold
) {
646 return shrinkImpl(len
);
648 assertx(len
< MaxSize
);
653 // State transition from Mode::Shared to Mode::Flat.
654 StringData
* StringData::escalate(size_t cap
) {
655 assertx(isProxy() && !isStatic() && cap
>= m_len
);
657 auto const sd
= allocFlat(cap
);
658 sd
->m_lenAndHash
= m_lenAndHash
;
659 auto const sd_data
= reinterpret_cast<char*>(sd
+ 1);
660 *memcpy8(sd_data
, data(), m_len
) = 0;
662 assertx(sd
->hasExactlyOneRef());
663 assertx(sd
->isFlat());
664 assertx(sd
->checkSane());
668 void StringData::dump() const {
671 printf("StringData(%d) (%s%s%s%d): [", m_count
,
672 isProxy() ? "proxy " : "",
673 isStatic() ? "static " : "",
674 isUncounted() ? "uncounted " : "",
675 static_cast<int>(s
.size()));
676 for (uint32_t i
= 0; i
< s
.size(); i
++) {
677 char ch
= s
.data()[i
];
681 printf("\\x%02x", ch
);
687 StringData
* StringData::getChar(int offset
) const {
688 if (offset
>= 0 && offset
< size()) {
689 return makeStaticString(data()[offset
]);
691 raise_notice("Uninitialized string offset: %d", offset
);
692 return staticEmptyString();
695 StringData
* StringData::increment() {
696 assertx(!isImmutable() && !hasMultipleRefs());
699 auto const sd
= UNLIKELY(isProxy())
700 ? escalate(m_len
+ 1)
701 : reserve(m_len
+ 1);
702 sd
->incrementHelper();
706 void StringData::incrementHelper() {
707 raise_notice("Increment on string '%s'", data());
710 enum class CharKind
{
717 auto const len
= m_len
;
718 auto const s
= mutableData();
721 auto last
= CharKind::UNKNOWN
; // Shut up the compiler warning
726 if (ch
>= 'a' && ch
<= 'z') {
734 last
= CharKind::LOWER_CASE
;
735 } else if (ch
>= 'A' && ch
<= 'Z') {
743 last
= CharKind::UPPER_CASE
;
744 } else if (ch
>= '0' && ch
<= '9') {
752 last
= CharKind::NUMERIC
;
764 if (UNLIKELY(len
+ 1 > MaxSize
)) {
765 raiseStringLengthExceededError(len
+ 1);
768 assertx(len
+ 1 <= capacity());
769 memmove(s
+ 1, s
, len
);
774 case CharKind::NUMERIC
:
777 case CharKind::UPPER_CASE
:
780 case CharKind::LOWER_CASE
:
789 void StringData::preCompute() {
791 m_hash
= hash_string_i_unsafe(s
.data(), s
.size());
792 assertx(m_hash
>= 0);
794 (is_numeric_string(s
.data(), s
.size(), nullptr, nullptr,
795 1, nullptr) == KindOfNull
)) {
796 m_hash
|= STRHASH_MSB
;
800 #if (!defined(__SSE4_2__) && !defined(ENABLE_AARCH64_CRC)) || \
801 defined(NO_HWCRC) || !defined(NO_M_DATA) || defined(_MSC_VER)
802 // This function is implemented directly in ASM in string-data-*.S otherwise.
803 NEVER_INLINE strhash_t
StringData::hashHelper() const {
805 strhash_t h
= hash_string_i_unsafe(data(), m_len
);
812 ///////////////////////////////////////////////////////////////////////////////
815 DataType
StringData::isNumericWithVal(int64_t &lval
, double &dval
,
816 int allow_errors
, int* overflow
) const {
817 if (m_hash
< 0) return KindOfNull
;
818 DataType ret
= KindOfNull
;
821 ret
= is_numeric_string(
829 if (ret
== KindOfNull
&& allow_errors
) {
830 // a proxy string has its hash precomputed - so it can't
831 // suddenly go from being numeric to not-numeric
833 m_hash
|= STRHASH_MSB
;
839 bool StringData::isNumeric() const {
840 if (m_hash
< 0) return false;
841 int64_t lval
; double dval
;
842 DataType ret
= isNumericWithVal(lval
, dval
, 0);
851 case KindOfPersistentString
:
853 case KindOfPersistentVec
:
855 case KindOfPersistentDict
:
857 case KindOfPersistentKeyset
:
864 case KindOfLazyClass
:
873 bool StringData::isInteger() const {
874 if (m_hash
< 0) return false;
875 int64_t lval
; double dval
;
876 DataType ret
= isNumericWithVal(lval
, dval
, 0);
885 case KindOfPersistentString
:
887 case KindOfPersistentVec
:
889 case KindOfPersistentDict
:
891 case KindOfPersistentKeyset
:
898 case KindOfLazyClass
:
907 bool StringData::toBoolean() const {
908 return !empty() && !isZero();
911 int64_t StringData::toInt64(int base
/* = 10 */) const {
912 return strtoll(data(), nullptr, base
);
915 double StringData::toDouble() const {
917 if (s
.size()) return zend_strtod(s
.data(), nullptr);
921 DataType
StringData::toNumeric(int64_t &lval
, double &dval
) const {
922 if (m_hash
< 0) return KindOfString
;
923 DataType ret
= isNumericWithVal(lval
, dval
, 0);
924 if (ret
== KindOfInt64
|| ret
== KindOfDouble
) return ret
;
928 ///////////////////////////////////////////////////////////////////////////////
931 bool StringData::equal(const StringData
*s
) const {
933 if (s
== this) return true;
936 if (!(m_hash
< 0 || s
->m_hash
< 0)) {
937 ret
= numericCompare(s
, true);
945 int StringData::numericCompare(const StringData
*v2
, bool eq
) const {
949 int64_t lval1
, lval2
;
952 if ((ret1
= isNumericWithVal(lval1
, dval1
, 0, &oflow1
)) == KindOfNull
||
953 (ret1
== KindOfDouble
&& !std::isfinite(dval1
)) ||
954 (ret2
= v2
->isNumericWithVal(lval2
, dval2
, 0, &oflow2
)) == KindOfNull
||
955 (ret2
== KindOfDouble
&& !std::isfinite(dval2
))) {
958 if (oflow1
&& oflow1
== oflow2
&& dval1
== dval2
) {
959 return -2; // overflow in same direction, comparison will be inaccurate
961 if (ret1
== KindOfInt64
&& ret2
== KindOfInt64
) {
962 if (lval1
> lval2
) return 1;
963 if (lval1
== lval2
) return 0;
966 if (ret1
== KindOfDouble
&& ret2
== KindOfDouble
) {
967 if (dval1
> dval2
) return 1;
968 if (dval1
== dval2
) return 0;
972 if (ret1
== KindOfDouble
) {
973 assertx(ret2
== KindOfInt64
);
977 dval2
= (double)lval2
;
979 assertx(ret1
== KindOfInt64
);
980 assertx(ret2
== KindOfDouble
);
984 dval1
= (double)lval1
;
987 if (dval1
> dval2
) return 1;
988 if (dval1
== dval2
) {
989 if (eq
) handleConvNoticeForEq("int", "float");
995 int StringData::compare(const StringData
*v2
) const {
998 if (v2
== this) return 0;
1000 int ret
= numericCompare(v2
, false);
1003 int len2
= v2
->size();
1004 int len
= len1
< len2
? len1
: len2
;
1005 ret
= memcmp(data(), v2
->data(), len
);
1006 if (ret
) return ret
;
1007 if (len1
== len2
) return 0;
1008 return len
< len1
? 1 : -1;
1014 StringData::substr(int start
, int length
/* = StringData::MaxSize */) {
1015 if (start
< 0 || start
>= size() || length
<= 0) {
1016 return staticEmptyString();
1019 auto const max_len
= size() - start
;
1020 if (length
> max_len
) {
1024 assertx(length
> 0);
1025 if (UNLIKELY(length
== size())) {
1029 if (UNLIKELY(length
== 1)) {
1030 return makeStaticString(data()[start
]);
1032 return StringData::Make(data() + start
, length
, CopyString
);
1035 ///////////////////////////////////////////////////////////////////////////////
1038 std::string
StringData::toCppString() const {
1040 return std::string(s
.data(), s
.size());
1043 bool StringData::checkSane() const {
1044 static_assert(sizeof(StringData
) == (use_lowptr
? 16 : 24),
1045 "StringData size changed---update assertion if you mean it");
1046 static_assert(size_t(MaxSize
) <= size_t(INT_MAX
), "Beware int wraparound");
1048 static_assert(sizeof(StringData
) == SD_DATA
, "");
1049 static_assert(offsetof(StringData
, m_len
) == SD_LEN
, "");
1050 static_assert(offsetof(StringData
, m_hash
) == SD_HASH
, "");
1052 assertx(kindIsValid());
1053 assertx(uint32_t(size()) <= MaxSize
);
1054 assertx(size() >= 0);
1055 assertx(IMPLIES(isSymbol(), isStatic()));
1056 if (!isImmutable()) {
1057 assertx(size() <= capacity());
1058 assertx(capacity() <= MaxSize
);
1060 // isFlat() and isProxy() both check whether m_data == payload(),
1061 // which guarantees by definition that isFlat() != isProxy()
1065 //////////////////////////////////////////////////////////////////////