1 #include "hphp/runtime/ext/collections/hash-collection.h"
2 #include "hphp/runtime/ext/collections/ext_collections-map.h"
3 #include "hphp/runtime/ext/collections/ext_collections-set.h"
4 #include "hphp/runtime/base/array-init.h"
5 #include "hphp/runtime/base/mixed-array.h"
6 #include "hphp/runtime/base/sort-helpers.h"
7 #include "hphp/runtime/vm/vm-regs.h"
8 #include "hphp/util/text-util.h"
11 /////////////////////////////////////////////////////////////////////////////
14 HashCollection::HashCollection(Class
* cls
, HeaderKind kind
, uint32_t cap
)
15 : ObjectData(cls
, NoInit
{}, collections::objectFlags
, kind
)
17 , m_arr(cap
== 0 ? staticEmptyDictArrayAsMixed() :
18 MixedArray::asMixed(MixedArray::MakeReserveDict(cap
)))
22 void HashCollection::throwTooLarge() {
23 assertx(getClassName().size() == 6);
24 auto clsName
= getClassName().get()->slice();
25 String
msg(130, ReserveString
);
26 auto buf
= msg
.bufferSlice();
27 auto sz
= snprintf(buf
.data(), buf
.size() + 1,
28 "%s object has reached its maximum capacity of %u element "
29 "slots and does not have room to add a new element",
30 clsName
.data() + 3, // strip "HH\" prefix
33 msg
.setSize(std::min
<int>(sz
, buf
.size()));
34 SystemLib::throwInvalidOperationExceptionObject(msg
);
38 void HashCollection::throwReserveTooLarge() {
39 assertx(getClassName().size() == 6);
40 auto clsName
= getClassName().get()->slice();
41 String
msg(80, ReserveString
);
42 auto buf
= msg
.bufferSlice();
43 auto sz
= snprintf(buf
.data(), buf
.size() + 1,
44 "%s does not support reserving room for more than %u elements",
45 clsName
.data() + 3, // strip "HH\" prefix
48 msg
.setSize(std::min
<int>(sz
, buf
.size()));
49 SystemLib::throwInvalidOperationExceptionObject(msg
);
53 int32_t* HashCollection::warnUnbalanced(size_t n
, int32_t* ei
) const {
54 if (n
> size_t(RuntimeOption::MaxArrayChain
)) {
55 raise_error("%s is too unbalanced (%lu)",
56 getClassName().data() + 3, // strip "HH\" prefix
63 void HashCollection::warnOnStrIntDup() const {
64 req::hash_set
<int64_t> seenVals
;
66 auto* eLimit
= elmLimit();
67 for (auto* e
= firstElm(); e
!= eLimit
; e
= nextElm(e
, eLimit
)) {
73 assertx(e
->hasStrKey());
74 // isStriclyInteger() puts the int value in newVal as a side effect.
75 if (!e
->skey
->isStrictlyInteger(newVal
)) continue;
78 if (seenVals
.find(newVal
) != seenVals
.end()) {
79 auto cls
= getVMClass()->name()->toCppString();
80 auto pos
= cls
.rfind('\\');
81 if (pos
!= std::string::npos
) {
82 cls
= cls
.substr(pos
+ 1);
85 "%s::toArray() for a %s containing both int(%" PRId64
") "
86 "and string('%" PRId64
"')",
96 seenVals
.insert(newVal
);
98 // Do nothing if no 'duplicates' were found.
101 Array
HashCollection::toArray() {
103 return empty_array();
105 auto ad
= arrayData()->toPHPArray(true);
106 if (UNLIKELY(ad
->size() < m_size
)) warnOnStrIntDup();
108 assertx(ad
->m_pos
== 0);
109 return Array::attach(ad
);
112 Array
HashCollection::toKeysArray() {
113 PackedArrayInit
ai(m_size
);
114 auto* eLimit
= elmLimit();
115 for (auto* e
= firstElm(); e
!= eLimit
; e
= nextElm(e
, eLimit
)) {
116 if (e
->hasIntKey()) {
117 ai
.append(int64_t{e
->ikey
});
119 assertx(e
->hasStrKey());
120 ai
.append(VarNR(e
->skey
).tv());
126 Array
HashCollection::toValuesArray() {
127 PackedArrayInit
ai(m_size
);
128 auto* eLimit
= elmLimit();
129 for (auto* e
= firstElm(); e
!= eLimit
; e
= nextElm(e
, eLimit
)) {
130 ai
.append(tvAsCVarRef(&e
->data
));
135 void HashCollection::remove(int64_t key
) {
137 auto p
= findForRemove(key
, hash_int64(key
));
143 void HashCollection::remove(StringData
* key
) {
145 auto p
= findForRemove(key
, key
->hash());
151 void HashCollection::eraseNoCompact(ssize_t pos
) {
152 assertx(canMutateBuffer());
153 assertx(validPos(pos
) && !isTombstone(pos
));
155 arrayData()->eraseNoCompact(pos
);
160 void HashCollection::makeRoom() {
162 assertx(posLimit() == cap());
163 if (LIKELY(!isDensityTooLow())) {
164 if (UNLIKELY(cap() == MaxSize
)) {
167 assertx(scale() > 0);
172 assertx(canMutateBuffer());
173 assertx(m_immCopy
.isNull());
178 void HashCollection::reserve(int64_t sz
) {
179 assertx(m_size
<= posLimit() && posLimit() <= cap());
180 auto cap
= static_cast<int64_t>(this->cap());
181 if (LIKELY(sz
> cap
)) {
182 if (UNLIKELY(sz
> int64_t(MaxReserveSize
))) {
183 throwReserveTooLarge();
185 // Fast path: The requested capacity is greater than the current capacity.
186 // Grow to the smallest allowed capacity that is sufficient.
187 grow(MixedArray::computeScaleFromSize(sz
));
188 assertx(canMutateBuffer());
191 if (LIKELY(!hasTombstones())) {
192 // Fast path: There are no tombstones and the requested capacity is less
193 // than or equal to the current capacity.
197 if (sz
+ int64_t(posLimit() - m_size
) <= cap
|| isDensityTooLow()) {
198 // If we reach this case, then either (1) density is too low (this is
199 // possible because of methods like retain()), in which case we compact
200 // to make room and return, OR (2) density is not too low and either
201 // sz < m_size or there's enough room to add sz-m_size elements, in
202 // which case we do nothing and return.
203 compactOrShrinkIfDensityTooLow();
204 assertx(sz
+ int64_t(posLimit() - m_size
) <= cap
);
208 // If we reach this case, then density is not too low and sz > m_size and
209 // there is not enough room to add sz-m_size elements. While would could
210 // compact to make room, it's better for Hysteresis if we grow capacity
212 assertx(!isDensityTooLow());
213 assertx(sz
+ int64_t(posLimit() - m_size
) > cap
);
214 assertx(cap
< MaxSize
&& tableMask() != 0);
215 auto newScale
= scale() * 2;
216 assertx(sz
> 0 && MixedArray::Capacity(newScale
) >= sz
);
218 assertx(canMutateBuffer());
222 void HashCollection::resizeHelper(uint32_t newCap
) {
223 assertx(newCap
>= m_size
);
224 assertx(m_immCopy
.isNull());
225 // Allocate a new ArrayData with the specified capacity and dup
226 // all the elements (without copying over tombstones).
227 auto ad
= arrayData() == staticEmptyDictArrayAsMixed() ?
228 MixedArray::asMixed(MixedArray::MakeReserveDict(newCap
)) :
229 MixedArray::CopyReserve(m_arr
, newCap
);
232 assertx(canMutateBuffer());
235 void HashCollection::grow(uint32_t newScale
) {
236 auto newCap
= MixedArray::Capacity(newScale
);
237 assertx(m_size
<= posLimit() && posLimit() <= cap() && cap() <= newCap
);
238 assertx(SmallSize
<= newCap
&& newCap
<= MaxSize
);
239 assertx(m_size
<= newCap
);
240 auto oldAd
= arrayData();
242 if (m_size
> 0 && !oldAd
->cowCheck()) {
243 m_arr
= MixedArray::Grow(oldAd
, newScale
, false);
246 // For cases where m_size is zero or the buffer's refcount is
247 // greater than 1, call resizeHelper().
248 resizeHelper(newCap
);
250 assertx(canMutateBuffer());
251 assertx(m_immCopy
.isNull());
254 void HashCollection::compact() {
255 assertx(isDensityTooLow());
257 if (!arrayData()->cowCheck()) {
258 // MixedArray::compact can only handle cases where the buffer's
260 arrayData()->compact(false);
262 // For cases where the buffer's refcount is greater than 1, call
266 assertx(canMutateBuffer());
267 assertx(m_immCopy
.isNull());
268 assertx(!isDensityTooLow());
271 void HashCollection::shrink(uint32_t oldCap
/* = 0 */) {
272 assertx(isCapacityTooHigh() && (oldCap
== 0 || oldCap
< cap()));
273 assertx(m_size
<= posLimit() && posLimit() <= cap());
277 // If an old capacity was specified, use that
279 // .. unless the old capacity is too small, in which case we use the
280 // smallest capacity that is large enough to hold the current number
282 for (; newCap
< m_size
; newCap
<<= 1) {}
283 assertx(newCap
== computeMaxElms(folly::nextPowTwo
<uint64_t>(newCap
) - 1));
285 if (m_size
== 0 && nextKI() == 0) {
287 m_arr
= staticEmptyDictArrayAsMixed();
290 // If no old capacity was provided, we compute the largest capacity
291 // where m_size/cap() is less than or equal to 0.5 for good hysteresis
292 size_t doubleSz
= size_t(m_size
) * 2;
293 uint32_t capThreshold
= (doubleSz
< size_t(MaxSize
)) ? doubleSz
: MaxSize
;
294 for (newCap
= SmallSize
* 2; newCap
< capThreshold
; newCap
<<= 1) {}
296 assertx(SmallSize
<= newCap
&& newCap
<= MaxSize
);
297 assertx(m_size
<= newCap
);
298 auto* oldAd
= arrayData();
299 if (!oldAd
->cowCheck()) {
300 // If the buffer's refcount is 1, we can teleport the elements
302 auto oldBuf
= data();
303 auto oldUsed
= posLimit();
304 auto oldNextKI
= nextKI();
305 m_arr
= MixedArray::asMixed(MixedArray::MakeReserveDict(newCap
));
306 m_arr
->m_size
= m_size
;
307 auto data
= this->data();
308 auto table
= hashTab();
309 auto table_mask
= tableMask();
311 setNextKI(oldNextKI
);
312 for (uint32_t frPos
= 0, toPos
= 0; toPos
< m_size
; ++toPos
, ++frPos
) {
313 frPos
= skipTombstonesNoBoundsCheck(frPos
, oldUsed
, oldBuf
);
314 copyElm(oldBuf
[frPos
], data
[toPos
]);
315 *findForNewInsert(table
, table_mask
, data
[toPos
].probe()) = toPos
;
320 // For cases where the buffer's refcount is greater than 1, call
322 resizeHelper(newCap
);
324 assertx(canMutateBuffer());
325 assertx(m_immCopy
.isNull());
326 assertx(!isCapacityTooHigh() || newCap
== oldCap
);
329 HashCollection::Elm
& HashCollection::allocElmFront(MixedArray::Inserter ei
) {
330 assertx(MixedArray::isValidIns(ei
) && !MixedArray::isValidPos(*ei
));
331 assertx(m_size
<= posLimit() && posLimit() < cap());
332 // Move the existing elements to make element slot 0 available.
333 memmove(data() + 1, data(), posLimit() * sizeof(Elm
));
335 // Update the hashtable to reflect the fact that everything was
336 // moved over one position
337 auto* hash
= hashTab();
338 auto* hashEnd
= hash
+ arrayData()->hashSize();
339 for (; hash
!= hashEnd
; ++hash
) {
340 if (validPos(*hash
)) {
344 // Set the hash entry we found to point to element slot 0.
346 // Adjust m_pos so that is points at this new first element.
347 arrayData()->m_pos
= 0;
348 // Adjust size to reflect that we're adding a new element.
350 // Store the value into element slot 0.
355 * preSort() does an initial pass to do some preparatory work before the
356 * sort algorithm runs. For sorts that use builtin comparators, the types
357 * of values are also observed during this first pass. By observing the
358 * types during this initial pass, we can often use a specialized
359 * comparator and avoid performing type checks during the actual sort.
361 template <typename AccessorT
>
362 SortFlavor
HashCollection::preSort(const AccessorT
& acc
, bool checkTypes
) {
364 if (!checkTypes
&& !hasTombstones()) {
365 // No need to loop over the elements, we're done
368 auto* start
= data();
369 auto* end
= data() + posLimit();
370 bool allInts UNUSED
= true;
371 bool allStrs UNUSED
= true;
374 while (!isTombstone(start
)) {
375 allInts
= (allInts
&& acc
.isInt(*start
));
376 allStrs
= (allStrs
&& acc
.isStr(*start
));
383 while (!isTombstone(start
)) {
394 while (isTombstone(end
)) {
400 copyElm(*end
, *start
);
403 setPosLimit(start
- data());
404 // The logic above possibly moved elements and tombstones around
405 // within the buffer, so we make sure m_pos is not pointing at
406 // garbage by resetting it. The logic above ensures that the first
407 // slot is not a tombstone, so it's safe to set m_pos to 0.
408 arrayData()->m_pos
= 0;
409 assertx(!hasTombstones());
411 return allStrs
? StringSort
: allInts
? IntegerSort
: GenericSort
;
418 * postSort() runs after the sort has been performed. For c_Map,
419 * postSort() handles rebuilding the hash.
421 void HashCollection::postSort() { // Must provide the nothrow guarantee
422 arrayData()->postSort(false);
425 #define SORT_CASE(flag, cmp_type, acc_type) \
428 cmp_type##Compare<acc_type, flag, true> comp; \
429 HPHP::Sort::sort(data(), data() + m_size, comp); \
431 cmp_type##Compare<acc_type, flag, false> comp; \
432 HPHP::Sort::sort(data(), data() + m_size, comp); \
436 #define SORT_CASE_BLOCK(cmp_type, acc_type) \
437 switch (sort_flags) { \
438 default: /* fall through to SORT_REGULAR case */ \
439 SORT_CASE(SORT_REGULAR, cmp_type, acc_type) \
440 SORT_CASE(SORT_NUMERIC, cmp_type, acc_type) \
441 SORT_CASE(SORT_STRING, cmp_type, acc_type) \
442 SORT_CASE(SORT_LOCALE_STRING, cmp_type, acc_type) \
443 SORT_CASE(SORT_NATURAL, cmp_type, acc_type) \
444 SORT_CASE(SORT_NATURAL_CASE, cmp_type, acc_type) \
446 #define CALL_SORT(acc_type) \
447 if (flav == StringSort) { \
448 SORT_CASE_BLOCK(StrElm, acc_type) \
449 } else if (flav == IntegerSort) { \
450 SORT_CASE_BLOCK(IntElm, acc_type) \
452 SORT_CASE_BLOCK(Elm, acc_type) \
454 #define SORT_BODY(acc_type) \
456 SortFlavor flav = preSort<acc_type>(acc_type(), true); \
458 CALL_SORT(acc_type); \
460 /* make sure the map is left in a consistent state */ \
467 void HashCollection::asort(int sort_flags
, bool ascending
) {
468 if (m_size
<= 1) return;
470 SORT_BODY(AssocValAccessor
<HashCollection::Elm
>);
473 void HashCollection::ksort(int sort_flags
, bool ascending
) {
474 if (m_size
<= 1) return;
476 SORT_BODY(AssocKeyAccessor
<HashCollection::Elm
>);
480 #undef SORT_CASE_BLOCK
484 #define USER_SORT_BODY(acc_type) \
488 vm_decode_function(cmp_function, cf(), false, ctx); \
492 preSort<acc_type>(acc_type(), false); \
494 /* make sure the map is left in a consistent state */ \
497 ElmUCompare<acc_type> comp; \
499 HPHP::Sort::sort(data(), data() + m_size, comp); \
503 bool HashCollection::uasort(const Variant
& cmp_function
) {
504 if (m_size
<= 1) return true;
506 USER_SORT_BODY(AssocValAccessor
<HashCollection::Elm
>);
509 bool HashCollection::uksort(const Variant
& cmp_function
) {
510 if (m_size
<= 1) return true;
512 USER_SORT_BODY(AssocKeyAccessor
<HashCollection::Elm
>);
515 #undef USER_SORT_BODY
517 void HashCollection::mutateImpl() {
518 assertx(arrayData()->hasMultipleRefs());
520 if (canMutateBuffer()) {
523 auto* oldAd
= arrayData();
524 m_arr
= MixedArray::asMixed(MixedArray::Copy(oldAd
));
525 assertx(oldAd
->hasMultipleRefs());
526 oldAd
->decRefCount();
529 /////////////////////////////////////////////////////////////////////////////