1 #ifndef incl_HPHP_COLLECTIONS_HASHCOLLECTION_H
2 #define incl_HPHP_COLLECTIONS_HASHCOLLECTION_H
4 #include "hphp/runtime/ext/extension.h"
5 #include "hphp/runtime/ext/collections/ext_collections.h"
6 #include "hphp/runtime/base/mixed-array.h"
7 #include "hphp/runtime/base/mixed-array-defs.h"
8 #include "hphp/runtime/base/tv-refcount.h"
11 /////////////////////////////////////////////////////////////////////////////
13 ALWAYS_INLINE MixedArray
* CreateDictAsMixed() {
14 return MixedArray::asMixed(ArrayData::CreateDict());
17 // Common base class for BaseMap/BaseSet collections
18 struct HashCollection
: ObjectData
{
19 explicit HashCollection(Class
* cls
, HeaderKind kind
)
20 : ObjectData(cls
, NoInit
{}, ObjectData::NoAttrs
, kind
)
22 , m_arr(CreateDictAsMixed())
24 explicit HashCollection(Class
* cls
, HeaderKind kind
, ArrayData
* arr
)
25 : ObjectData(cls
, NoInit
{}, ObjectData::NoAttrs
, kind
)
26 , m_unusedAndSize(arr
->m_size
)
27 , m_arr(MixedArray::asMixed(arr
))
29 explicit HashCollection(Class
* cls
, HeaderKind kind
, uint32_t cap
);
31 using Elm
= MixedArray::Elm
;
32 using hash_t
= MixedArray::hash_t
;
34 static const int32_t Empty
= MixedArray::Empty
;
35 static const int32_t Tombstone
= MixedArray::Tombstone
;
36 static const uint32_t LoadScale
= MixedArray::LoadScale
;
37 static const uint32_t SmallScale
= MixedArray::SmallScale
;
38 static const uint32_t SmallSize
= MixedArray::SmallSize
;
39 static const uint32_t MaxSize
= MixedArray::MaxSize
;
40 // HashCollections can only guarantee that it won't throw "cannot add
41 // element" exceptions if m_size <= MaxSize / 2. Therefore, we only allow
42 // reserve() to make room for up to MaxSize / 2 elements.
43 static const uint32_t MaxReserveSize
= MaxSize
/ 2;
45 int64_t size() const {
49 Array
toArray() = delete;
51 template <IntishCast intishCast
= IntishCast::None
>
52 Array
toPHPArrayImpl() {
58 if (intishCast
== IntishCast::None
) {
59 ad
= arrayData()->toPHPArray(true);
60 } else if (intishCast
== IntishCast::Cast
) {
61 ad
= arrayData()->toPHPArrayIntishCast(true);
66 if (UNLIKELY(ad
->size() < m_size
)) warnOnStrIntDup();
68 assertx(ad
->m_pos
== 0);
69 return Array::attach(ad
);
76 Array
toValuesArray();
78 bool contains(int64_t key
) const {
79 return find(key
, hash_int64(key
)) != Empty
;
81 bool contains(StringData
* key
) const {
82 return find(key
, key
->hash()) != Empty
;
85 void remove(int64_t key
);
86 void remove(StringData
* key
);
87 void eraseNoCompact(MixedArray::RemovePos pos
);
88 void erase(MixedArray::RemovePos pos
) {
90 compactOrShrinkIfDensityTooLow();
93 bool isFull() { return posLimit() == cap(); }
94 bool isDensityTooLow() const {
95 bool b
= (m_size
< posLimit() / 2);
97 arrayData()->isStatic() && arrayData()->empty(),
100 assertx(IMPLIES(cap() == 0, !b
));
104 bool isCapacityTooHigh() const {
105 // Return true if current capacity at least 8x greater than m_size AND
106 // if current capacity is at least 8x greater than the minimum capacity
107 bool b
= ((uint64_t(cap()) >= uint64_t(m_size
) * 8) &&
108 (cap() >= HashCollection::SmallSize
* 8));
110 arrayData()->isStatic() && arrayData()->empty(),
113 assertx(IMPLIES(cap() == 0, !b
));
117 // grow() will increase the capacity of this HashCollection; newScale must
118 // be greater than or equal to the current scale so that the new cap and mask
119 // satisfy all the usual cap/mask invariants.
120 void grow(uint32_t newScale
);
122 // resizeHelper() dups all of the elements (not copying tombstones) to a
123 // new buffer of the specified capacity and decRefs the old buffer. This
124 // method can be used to decrease this HashCollection's capacity.
125 void resizeHelper(uint32_t newCap
);
127 // This method will increase capacity or compact as needed to make
128 // room to add one new element; it asserts that is is only called
129 // when isFull() is true
132 // This method performs an in-place compaction; it asserts that it
133 // is only called when isDensityTooLow() is true
136 // This method reduces this HashCollection's capacity; it asserts that it
137 // is only called when isCapacityTooHigh() is true.
138 void shrink(uint32_t cap
= 0);
140 // In general this method should be called after one or more elements
141 // have been removed. If density is too low, it will shrink or compact
142 // this HashCollection as appropriate.
143 void compactOrShrinkIfDensityTooLow() {
144 if (UNLIKELY(isDensityTooLow())) {
145 if (isCapacityTooHigh()) {
153 // In general this method should be called after a speculative reserve
154 // and zero or more adds have been performed. If capacity is too high,
155 // it will shrink this HashCollection.
156 void shrinkIfCapacityTooHigh(uint32_t oldCap
) {
157 if (UNLIKELY(isCapacityTooHigh() && cap() > oldCap
)) {
162 HashCollection::Elm
& allocElm(MixedArray::Inserter ei
) {
163 assertx(canMutateBuffer());
164 assertx(MixedArray::isValidIns(ei
) && !MixedArray::isValidPos(*ei
)
165 && m_size
<= posLimit() && posLimit() < cap());
166 size_t i
= posLimit();
173 HashCollection::Elm
& allocElmFront(MixedArray::Inserter ei
);
175 // This method will grow or compact as needed in preparation for
176 // repeatedly adding new elements until m_size >= sz.
177 void reserve(int64_t sz
);
179 // The iter functions below facilitate iteration over HashCollections.
180 // Iterators cannot store Elm pointers (because it's possible for m_data
181 // to change without bumping m_version in some cases), so indices are
184 bool iter_valid(ssize_t pos
) const {
185 return pos
< (ssize_t
)posLimit();
188 bool iter_valid(ssize_t pos
, ssize_t limit
) const {
189 assertx(limit
== (ssize_t
)posLimit());
193 const Elm
* iter_elm(ssize_t pos
) const {
194 assertx(iter_valid(pos
));
195 return &(data()[pos
]);
198 ssize_t
iter_begin() const {
199 ssize_t limit
= posLimit();
201 for (; pos
!= limit
; ++pos
) {
202 auto* e
= iter_elm(pos
);
203 if (!isTombstone(e
)) break;
208 ssize_t
iter_next(ssize_t pos
) const {
209 ssize_t limit
= posLimit();
210 for (++pos
; pos
< limit
; ++pos
) {
211 auto* e
= iter_elm(pos
);
212 if (!isTombstone(e
)) return pos
;
217 ssize_t
iter_prev(ssize_t pos
) const {
218 ssize_t orig_pos
= pos
;
221 auto* e
= iter_elm(pos
);
222 if (!isTombstone(e
)) return pos
;
227 Variant
iter_key(ssize_t pos
) const {
228 assertx(iter_valid(pos
));
229 auto* e
= iter_elm(pos
);
230 if (e
->hasStrKey()) {
231 return Variant
{e
->skey
};
233 return (int64_t)e
->ikey
;
236 const TypedValue
* iter_value(ssize_t pos
) const {
237 assertx(iter_valid(pos
));
238 return &iter_elm(pos
)->data
;
241 uint32_t nthElmPos(size_t n
) const {
242 if (LIKELY(!hasTombstones())) {
243 // Fast path: HashCollection contains no tombstones
246 // Slow path: AssoCollection has at least one tombstone,
247 // so we need to count forward
248 // TODO Task# 4281431: If n > m_size/2 we could get better
249 // performance by starting at the end of the buffer and
256 while (isTombstone(pos
)) {
257 assertx(pos
+ 1 < posLimit());
262 assertx(pos
+ 1 < posLimit());
269 * mutate() must be called before doing anything that mutates
270 * this HashCollection's buffer, unless it can be proven that
271 * canMutateBuffer() is true. mutate() takes care of updating
272 * m_immCopy and making a copy of this HashCollection's buffer
276 assertx(IMPLIES(!m_immCopy
.isNull(), arrayData()->hasMultipleRefs()));
277 if (arrayData()->cowCheck()) {
278 // mutateImpl() does two things for us. First it drops the
279 // immutable collection held by m_immCopy (if m_immCopy is not
280 // null). Second, it takes care of copying the buffer if needed.
283 assertx(canMutateBuffer());
284 assertx(m_immCopy
.isNull());
288 assertx(m_immCopy
.isNull() ||
289 (arrayData() == ((HashCollection
*)m_immCopy
.get())->arrayData() &&
290 arrayData()->hasMultipleRefs()));
294 TypedValue
* findForUnserialize(int64_t k
) {
295 auto h
= hash_int64(k
);
296 auto p
= findForInsert(k
, h
);
297 if (UNLIKELY(MixedArray::isValidPos(*p
))) return nullptr;
298 auto e
= &allocElm(p
);
300 arrayData()->mutableKeyTypes()->recordInt();
305 TypedValue
* findForUnserialize(StringData
* key
) {
306 auto h
= key
->hash();
307 auto p
= findForInsert(key
, h
);
308 if (UNLIKELY(MixedArray::isValidPos(*p
))) return nullptr;
309 auto e
= &allocElm(p
);
310 e
->setStrKey(key
, h
);
311 arrayData()->mutableKeyTypes()->recordStr(key
);
316 * Batch insertion interface that postpones hashing, for cache efficiency.
317 * Use these methods in order:
318 * 1. batchInsertBegin,
319 * 2. any number of batchInsert calls,
320 * 3. tryBatchInsertEnd or batchInsertAbort
321 * No other methods may be called between 1 and 3.
323 uint32_t batchInsertBegin(int64_t n
) {
327 TypedValue
* batchInsert(StringData
* key
) {
328 auto h
= key
->hash();
329 // Not hashing yet, so position is unused for now.
330 int32_t unusedPos
= -1;
331 auto e
= &allocElm(MixedArray::Inserter(&unusedPos
));
332 e
->setStrKey(key
, h
);
333 arrayData()->mutableKeyTypes()->recordStr(key
);
337 * Attempts to finalize a batch insertion, given the return value from the
338 * call to batchInsertBegin. Returns true on success, and aborts and returns
339 * false on failure (e.g., duplicate keys).
341 bool tryBatchInsertEnd(uint32_t begin
) {
342 for (auto i
= begin
; i
< posLimit(); ++i
) {
345 auto p
= e
.hasStrKey() ?
346 findForInsert(e
.skey
, h
) :
347 findForInsert(e
.ikey
, h
);
348 if (UNLIKELY(MixedArray::isValidPos(*p
))) {
349 batchInsertAbort(begin
);
357 * Cancels a started batch insertion, given the return value from the call to
358 * batchInsertBegin, reverting to the state before it began. Idempotent.
360 void batchInsertAbort(uint32_t begin
) {
361 for (auto i
= posLimit(); i
> begin
; --i
) {
362 auto& e
= data()[i
- 1];
364 auto const old
= *tv
;
365 tv
->m_type
= kInvalidDataType
;
368 if (e
.hasStrKey()) decRefStr(e
.skey
);
373 static bool validPos(ssize_t pos
) {
377 static bool validPos(int32_t pos
) {
382 static std::array
<TypedValue
, 1>
383 makeArgsFromHashValue(const HashCollection::Elm
& e
) {
384 // note that this is a potentially unnecessary copy
385 // that might be reinterpret_cast ed away
386 // http://stackoverflow.com/questions/11205186/treat-c-cstyle-array-as-stdarray
387 return std::array
<TypedValue
, 1> {{ e
.data
}};
391 static std::array
<TypedValue
, 2>
392 makeArgsFromHashKeyAndValue(const HashCollection::Elm
& e
) {
393 return std::array
<TypedValue
, 2> {{
395 ? make_tv
<KindOfInt64
>(e
.ikey
)
396 : make_tv
<KindOfString
>(e
.skey
)),
402 * canMutateBuffer() indicates whether it is currently safe to directly
403 * modify this HashCollection's buffer.
405 bool canMutateBuffer() const {
406 assertx(IMPLIES(!arrayData()->cowCheck(), m_immCopy
.isNull()));
407 return !arrayData()->cowCheck();
410 static constexpr ptrdiff_t arrOffset() {
411 return offsetof(HashCollection
, m_arr
);
414 bool toBoolImpl() const {
415 return (m_size
!= 0);
418 ssize_t
find(int64_t ki
, inthash_t h
) const {
419 return m_arr
->find(ki
, h
);
422 ssize_t
find(const StringData
* s
, strhash_t h
) const {
423 return m_arr
->find(s
, h
);
426 auto findForRemove(int64_t k
, inthash_t h
) const {
427 return m_arr
->findForRemove(k
, h
);
430 auto findForRemove(const StringData
* s
, strhash_t h
) const {
431 return m_arr
->findForRemove(s
, h
);
434 MixedArray::Inserter
findForInsert(int64_t ki
, inthash_t h
) const {
435 return m_arr
->findForInsertUpdate(ki
, h
);
438 MixedArray::Inserter
findForInsert(const StringData
* s
, strhash_t h
) const {
439 return m_arr
->findForInsertUpdate(s
, h
);
442 MixedArray::Inserter
findForNewInsert(int32_t* table
, size_t mask
,
444 return m_arr
->findForNewInsert(table
, mask
, h0
);
447 static void copyElm(const Elm
& frE
, Elm
& toE
) {
448 memcpy(&toE
, &frE
, sizeof(Elm
));
451 static void dupElm(const Elm
& frE
, Elm
& toE
) {
452 assertx(!isTombstoneType(frE
.data
.m_type
));
453 memcpy(&toE
, &frE
, sizeof(Elm
));
454 if (toE
.hasStrKey()) toE
.skey
->incRefCount();
455 tvIncRefGen(toE
.data
);
458 MixedArray
* arrayData() { return m_arr
; }
459 const MixedArray
* arrayData() const { return m_arr
; }
462 * Copy the buffer and reset the immutable copy.
466 Elm
* data() { return m_arr
->data(); }
467 const Elm
* data() const { return m_arr
->data(); }
468 int32_t* hashTab() const { return m_arr
->hashTab(); }
470 void setSize(uint32_t sz
) {
471 assertx(sz
<= cap());
472 if (m_arr
->isStatic() && m_arr
->empty()) {
476 assertx(!arrayData()->hasMultipleRefs());
478 arrayData()->m_size
= sz
;
481 assertx(m_size
+ 1 <= cap());
482 assertx(!arrayData()->hasMultipleRefs());
484 arrayData()->m_size
= m_size
;
488 assertx(!arrayData()->hasMultipleRefs());
490 arrayData()->m_size
= m_size
;
492 uint32_t scale() const {
493 return arrayData()->scale();
495 uint32_t cap() const {
496 return arrayData()->capacity();
498 uint32_t tableMask() const {
499 return arrayData()->mask();
501 uint32_t posLimit() const {
502 return arrayData()->m_used
;
505 assertx(!arrayData()->hasMultipleRefs());
506 assertx(posLimit() + 1 <= cap());
507 arrayData()->m_used
++;
509 void setPosLimit(uint32_t limit
) {
510 auto* a
= arrayData();
511 if (a
->isStatic() && a
->empty()) {
515 assertx(!a
->hasMultipleRefs());
516 assertx(limit
<= cap());
520 return arrayData()->m_nextKI
;
522 void setNextKI(int64_t ki
) {
523 assertx(!arrayData()->hasMultipleRefs());
524 arrayData()->m_nextKI
= ki
;
526 void updateNextKI(int64_t ki
) {
527 assertx(!arrayData()->hasMultipleRefs());
528 auto* a
= arrayData();
529 if (ki
>= a
->m_nextKI
&& a
->m_nextKI
>= 0) {
530 a
->m_nextKI
= static_cast<uint64_t>(ki
) + 1;
534 // The skipTombstonesNoBoundsCheck helper functions assume that either
535 // the specified location is not a tombstone OR that there is at least
536 // one non-tombstone after the specified position.
539 skipTombstonesNoBoundsCheck(int32_t pos
, int32_t posLimit
, const Elm
* data
) {
540 assertx(pos
< posLimit
);
541 while (isTombstone(pos
, data
)) {
543 assertx(pos
< posLimit
);
548 int32_t skipTombstonesNoBoundsCheck(int32_t pos
) {
549 return skipTombstonesNoBoundsCheck(pos
, posLimit(), data());
552 const Elm
* firstElmImpl() const {
553 const Elm
* e
= data();
554 const Elm
* eLimit
= elmLimit();
555 for (; e
!= eLimit
&& isTombstone(e
); ++e
) {}
559 return (Elm
*)firstElmImpl();
561 const Elm
* firstElm() const {
562 return firstElmImpl();
566 return data() + posLimit();
568 const Elm
* elmLimit() const {
569 return data() + posLimit();
572 static Elm
* nextElm(Elm
* e
, Elm
* eLimit
) {
573 assertx(e
!= eLimit
);
574 for (++e
; e
!= eLimit
&& isTombstone(e
); ++e
) {}
577 static const Elm
* nextElm(const Elm
* e
, const Elm
* eLimit
) {
578 return (const Elm
*)nextElm((Elm
*)e
, (Elm
*)eLimit
);
581 static bool isTombstoneType(DataType t
) {
582 return t
== kInvalidDataType
;
585 static bool isTombstone(const Elm
* e
) {
586 return isTombstoneType(e
->data
.m_type
);
589 static bool isTombstone(ssize_t pos
, const Elm
* data
) {
590 return isTombstoneType(data
[pos
].data
.m_type
);
593 bool isTombstone(ssize_t pos
) const {
594 assertx(size_t(pos
) <= posLimit());
595 return isTombstone(pos
, data());
598 bool hasTombstones() const { return m_size
!= posLimit(); }
600 static uint32_t computeMaxElms(uint32_t tableMask
) {
601 return tableMask
- tableMask
/ LoadScale
;
604 [[noreturn
]] void throwTooLarge();
605 [[noreturn
]] void throwReserveTooLarge();
606 int32_t* warnUnbalanced(size_t n
, int32_t* ei
) const;
609 * Raises a warning if the set contains an int and a string with the same
610 * numeric value: e.g. Set {'123', 123}. It's a no-op otherwise.
612 void warnOnStrIntDup() const;
614 void scan(type_scan::Scanner
& scanner
) const {
616 scanner
.scan(m_immCopy
);
620 SortTmp(HashCollection
* h
, SortFunction sf
) : m_h(h
) {
621 if (hasUserDefinedCmp(sf
)) {
622 m_ad
= MixedArray::Copy(m_h
->m_arr
);
629 if (m_h
->m_arr
!= m_ad
) {
630 Array tmp
= Array::attach(m_h
->m_arr
);
631 assertx(m_ad
->isDictKind());
632 m_h
->m_arr
= static_cast<MixedArray
*>(m_ad
);
635 ArrayData
* operator->() { return m_ad
; }
643 // Replace the m_arr field with a new MixedArray. The array must be known to
644 // *not* contain any references.
645 void replaceArray(ArrayData
* adata
) {
648 m_arr
= MixedArray::asMixed(adata
);
649 adata
->incRefCount();
650 m_size
= adata
->size();
659 int64_t m_unusedAndSize
;
662 MixedArray
* m_arr
; // Elm store.
664 // A pointer to an immutable collection that shares its buffer with
669 /////////////////////////////////////////////////////////////////////////////