Codemod asserts to assertxs in the runtime
[hiphop-php.git] / hphp / runtime / ext / collections / hash-collection.cpp
blob8197590b7cd4f964cdd1c7baea38d27b68a720b0
1 #include "hphp/runtime/ext/collections/hash-collection.h"
2 #include "hphp/runtime/ext/collections/ext_collections-map.h"
3 #include "hphp/runtime/ext/collections/ext_collections-set.h"
4 #include "hphp/runtime/base/array-init.h"
5 #include "hphp/runtime/base/mixed-array.h"
6 #include "hphp/runtime/base/sort-helpers.h"
7 #include "hphp/runtime/vm/vm-regs.h"
8 #include "hphp/util/text-util.h"
10 namespace HPHP {
11 /////////////////////////////////////////////////////////////////////////////
12 // HashCollection
14 HashCollection::HashCollection(Class* cls, HeaderKind kind, uint32_t cap)
15 : ObjectData(cls, NoInit{}, collections::objectFlags, kind)
16 , m_unusedAndSize(0)
17 , m_arr(cap == 0 ? staticEmptyDictArrayAsMixed() :
18 MixedArray::asMixed(MixedArray::MakeReserveDict(cap)))
21 NEVER_INLINE
22 void HashCollection::throwTooLarge() {
23 assertx(getClassName().size() == 6);
24 auto clsName = getClassName().get()->slice();
25 String msg(130, ReserveString);
26 auto buf = msg.bufferSlice();
27 auto sz = snprintf(buf.data(), buf.size() + 1,
28 "%s object has reached its maximum capacity of %u element "
29 "slots and does not have room to add a new element",
30 clsName.data() + 3, // strip "HH\" prefix
31 MaxSize
33 msg.setSize(std::min<int>(sz, buf.size()));
34 SystemLib::throwInvalidOperationExceptionObject(msg);
37 NEVER_INLINE
38 void HashCollection::throwReserveTooLarge() {
39 assertx(getClassName().size() == 6);
40 auto clsName = getClassName().get()->slice();
41 String msg(80, ReserveString);
42 auto buf = msg.bufferSlice();
43 auto sz = snprintf(buf.data(), buf.size() + 1,
44 "%s does not support reserving room for more than %u elements",
45 clsName.data() + 3, // strip "HH\" prefix
46 MaxReserveSize
48 msg.setSize(std::min<int>(sz, buf.size()));
49 SystemLib::throwInvalidOperationExceptionObject(msg);
52 NEVER_INLINE
53 int32_t* HashCollection::warnUnbalanced(size_t n, int32_t* ei) const {
54 if (n > size_t(RuntimeOption::MaxArrayChain)) {
55 raise_error("%s is too unbalanced (%lu)",
56 getClassName().data() + 3, // strip "HH\" prefix
57 n);
59 return ei;
62 NEVER_INLINE
63 void HashCollection::warnOnStrIntDup() const {
64 req::hash_set<int64_t> seenVals;
66 auto* eLimit = elmLimit();
67 for (auto* e = firstElm(); e != eLimit; e = nextElm(e, eLimit)) {
68 int64_t newVal = 0;
70 if (e->hasIntKey()) {
71 newVal = e->ikey;
72 } else {
73 assertx(e->hasStrKey());
74 // isStriclyInteger() puts the int value in newVal as a side effect.
75 if (!e->skey->isStrictlyInteger(newVal)) continue;
78 if (seenVals.find(newVal) != seenVals.end()) {
79 auto cls = getVMClass()->name()->toCppString();
80 auto pos = cls.rfind('\\');
81 if (pos != std::string::npos) {
82 cls = cls.substr(pos + 1);
84 raise_warning(
85 "%s::toArray() for a %s containing both int(%" PRId64 ") "
86 "and string('%" PRId64 "')",
87 cls.c_str(),
88 toLower(cls).c_str(),
89 newVal,
90 newVal
93 return;
96 seenVals.insert(newVal);
98 // Do nothing if no 'duplicates' were found.
101 Array HashCollection::toArray() {
102 if (!m_size) {
103 return empty_array();
105 auto ad = arrayData()->toPHPArray(true);
106 if (UNLIKELY(ad->size() < m_size)) warnOnStrIntDup();
107 assertx(m_size);
108 assertx(ad->m_pos == 0);
109 return Array::attach(ad);
112 Array HashCollection::toKeysArray() {
113 PackedArrayInit ai(m_size);
114 auto* eLimit = elmLimit();
115 for (auto* e = firstElm(); e != eLimit; e = nextElm(e, eLimit)) {
116 if (e->hasIntKey()) {
117 ai.append(int64_t{e->ikey});
118 } else {
119 assertx(e->hasStrKey());
120 ai.append(VarNR(e->skey).tv());
123 return ai.toArray();
126 Array HashCollection::toValuesArray() {
127 PackedArrayInit ai(m_size);
128 auto* eLimit = elmLimit();
129 for (auto* e = firstElm(); e != eLimit; e = nextElm(e, eLimit)) {
130 ai.append(tvAsCVarRef(&e->data));
132 return ai.toArray();
135 void HashCollection::remove(int64_t key) {
136 mutate();
137 auto p = findForRemove(key, hash_int64(key));
138 if (validPos(p)) {
139 erase(p);
143 void HashCollection::remove(StringData* key) {
144 mutate();
145 auto p = findForRemove(key, key->hash());
146 if (validPos(p)) {
147 erase(p);
151 void HashCollection::eraseNoCompact(ssize_t pos) {
152 assertx(canMutateBuffer());
153 assertx(validPos(pos) && !isTombstone(pos));
154 assertx(m_size > 0);
155 arrayData()->eraseNoCompact(pos);
156 --m_size;
159 NEVER_INLINE
160 void HashCollection::makeRoom() {
161 assertx(isFull());
162 assertx(posLimit() == cap());
163 if (LIKELY(!isDensityTooLow())) {
164 if (UNLIKELY(cap() == MaxSize)) {
165 throwTooLarge();
167 assertx(scale() > 0);
168 grow(scale() * 2);
169 } else {
170 compact();
172 assertx(canMutateBuffer());
173 assertx(m_immCopy.isNull());
174 assertx(!isFull());
177 NEVER_INLINE
178 void HashCollection::reserve(int64_t sz) {
179 assertx(m_size <= posLimit() && posLimit() <= cap());
180 auto cap = static_cast<int64_t>(this->cap());
181 if (LIKELY(sz > cap)) {
182 if (UNLIKELY(sz > int64_t(MaxReserveSize))) {
183 throwReserveTooLarge();
185 // Fast path: The requested capacity is greater than the current capacity.
186 // Grow to the smallest allowed capacity that is sufficient.
187 grow(MixedArray::computeScaleFromSize(sz));
188 assertx(canMutateBuffer());
189 return;
191 if (LIKELY(!hasTombstones())) {
192 // Fast path: There are no tombstones and the requested capacity is less
193 // than or equal to the current capacity.
194 mutate();
195 return;
197 if (sz + int64_t(posLimit() - m_size) <= cap || isDensityTooLow()) {
198 // If we reach this case, then either (1) density is too low (this is
199 // possible because of methods like retain()), in which case we compact
200 // to make room and return, OR (2) density is not too low and either
201 // sz < m_size or there's enough room to add sz-m_size elements, in
202 // which case we do nothing and return.
203 compactOrShrinkIfDensityTooLow();
204 assertx(sz + int64_t(posLimit() - m_size) <= cap);
205 mutate();
206 return;
208 // If we reach this case, then density is not too low and sz > m_size and
209 // there is not enough room to add sz-m_size elements. While would could
210 // compact to make room, it's better for Hysteresis if we grow capacity
211 // by 2x instead.
212 assertx(!isDensityTooLow());
213 assertx(sz + int64_t(posLimit() - m_size) > cap);
214 assertx(cap < MaxSize && tableMask() != 0);
215 auto newScale = scale() * 2;
216 assertx(sz > 0 && MixedArray::Capacity(newScale) >= sz);
217 grow(newScale);
218 assertx(canMutateBuffer());
221 ALWAYS_INLINE
222 void HashCollection::resizeHelper(uint32_t newCap) {
223 assertx(newCap >= m_size);
224 assertx(m_immCopy.isNull());
225 // Allocate a new ArrayData with the specified capacity and dup
226 // all the elements (without copying over tombstones).
227 auto ad = arrayData() == staticEmptyDictArrayAsMixed() ?
228 MixedArray::asMixed(MixedArray::MakeReserveDict(newCap)) :
229 MixedArray::CopyReserve(m_arr, newCap);
230 decRefArr(m_arr);
231 m_arr = ad;
232 assertx(canMutateBuffer());
235 void HashCollection::grow(uint32_t newScale) {
236 auto newCap = MixedArray::Capacity(newScale);
237 assertx(m_size <= posLimit() && posLimit() <= cap() && cap() <= newCap);
238 assertx(SmallSize <= newCap && newCap <= MaxSize);
239 assertx(m_size <= newCap);
240 auto oldAd = arrayData();
241 dropImmCopy();
242 if (m_size > 0 && !oldAd->cowCheck()) {
243 m_arr = MixedArray::Grow(oldAd, newScale, false);
244 decRefArr(oldAd);
245 } else {
246 // For cases where m_size is zero or the buffer's refcount is
247 // greater than 1, call resizeHelper().
248 resizeHelper(newCap);
250 assertx(canMutateBuffer());
251 assertx(m_immCopy.isNull());
254 void HashCollection::compact() {
255 assertx(isDensityTooLow());
256 dropImmCopy();
257 if (!arrayData()->cowCheck()) {
258 // MixedArray::compact can only handle cases where the buffer's
259 // refcount is 1.
260 arrayData()->compact(false);
261 } else {
262 // For cases where the buffer's refcount is greater than 1, call
263 // resizeHelper().
264 resizeHelper(cap());
266 assertx(canMutateBuffer());
267 assertx(m_immCopy.isNull());
268 assertx(!isDensityTooLow());
271 void HashCollection::shrink(uint32_t oldCap /* = 0 */) {
272 assertx(isCapacityTooHigh() && (oldCap == 0 || oldCap < cap()));
273 assertx(m_size <= posLimit() && posLimit() <= cap());
274 dropImmCopy();
275 uint32_t newCap;
276 if (oldCap != 0) {
277 // If an old capacity was specified, use that
278 newCap = oldCap;
279 // .. unless the old capacity is too small, in which case we use the
280 // smallest capacity that is large enough to hold the current number
281 // of elements.
282 for (; newCap < m_size; newCap <<= 1) {}
283 assertx(newCap == computeMaxElms(folly::nextPowTwo<uint64_t>(newCap) - 1));
284 } else {
285 if (m_size == 0 && nextKI() == 0) {
286 decRefArr(m_arr);
287 m_arr = staticEmptyDictArrayAsMixed();
288 return;
290 // If no old capacity was provided, we compute the largest capacity
291 // where m_size/cap() is less than or equal to 0.5 for good hysteresis
292 size_t doubleSz = size_t(m_size) * 2;
293 uint32_t capThreshold = (doubleSz < size_t(MaxSize)) ? doubleSz : MaxSize;
294 for (newCap = SmallSize * 2; newCap < capThreshold; newCap <<= 1) {}
296 assertx(SmallSize <= newCap && newCap <= MaxSize);
297 assertx(m_size <= newCap);
298 auto* oldAd = arrayData();
299 if (!oldAd->cowCheck()) {
300 // If the buffer's refcount is 1, we can teleport the elements
301 // to a new buffer
302 auto oldBuf = data();
303 auto oldUsed = posLimit();
304 auto oldNextKI = nextKI();
305 m_arr = MixedArray::asMixed(MixedArray::MakeReserveDict(newCap));
306 m_arr->m_size = m_size;
307 auto data = this->data();
308 auto table = hashTab();
309 auto table_mask = tableMask();
310 setPosLimit(m_size);
311 setNextKI(oldNextKI);
312 for (uint32_t frPos = 0, toPos = 0; toPos < m_size; ++toPos, ++frPos) {
313 frPos = skipTombstonesNoBoundsCheck(frPos, oldUsed, oldBuf);
314 copyElm(oldBuf[frPos], data[toPos]);
315 *findForNewInsert(table, table_mask, data[toPos].probe()) = toPos;
317 oldAd->setZombie();
318 decRefArr(oldAd);
319 } else {
320 // For cases where the buffer's refcount is greater than 1, call
321 // resizeHelper()
322 resizeHelper(newCap);
324 assertx(canMutateBuffer());
325 assertx(m_immCopy.isNull());
326 assertx(!isCapacityTooHigh() || newCap == oldCap);
329 HashCollection::Elm& HashCollection::allocElmFront(MixedArray::Inserter ei) {
330 assertx(MixedArray::isValidIns(ei) && !MixedArray::isValidPos(*ei));
331 assertx(m_size <= posLimit() && posLimit() < cap());
332 // Move the existing elements to make element slot 0 available.
333 memmove(data() + 1, data(), posLimit() * sizeof(Elm));
334 incPosLimit();
335 // Update the hashtable to reflect the fact that everything was
336 // moved over one position
337 auto* hash = hashTab();
338 auto* hashEnd = hash + arrayData()->hashSize();
339 for (; hash != hashEnd; ++hash) {
340 if (validPos(*hash)) {
341 ++(*hash);
344 // Set the hash entry we found to point to element slot 0.
345 (*ei) = 0;
346 // Adjust m_pos so that is points at this new first element.
347 arrayData()->m_pos = 0;
348 // Adjust size to reflect that we're adding a new element.
349 incSize();
350 // Store the value into element slot 0.
351 return data()[0];
355 * preSort() does an initial pass to do some preparatory work before the
356 * sort algorithm runs. For sorts that use builtin comparators, the types
357 * of values are also observed during this first pass. By observing the
358 * types during this initial pass, we can often use a specialized
359 * comparator and avoid performing type checks during the actual sort.
361 template <typename AccessorT>
362 SortFlavor HashCollection::preSort(const AccessorT& acc, bool checkTypes) {
363 assertx(m_size > 0);
364 if (!checkTypes && !hasTombstones()) {
365 // No need to loop over the elements, we're done
366 return GenericSort;
368 auto* start = data();
369 auto* end = data() + posLimit();
370 bool allInts UNUSED = true;
371 bool allStrs UNUSED = true;
372 for (;;) {
373 if (checkTypes) {
374 while (!isTombstone(start)) {
375 allInts = (allInts && acc.isInt(*start));
376 allStrs = (allStrs && acc.isStr(*start));
377 ++start;
378 if (start == end) {
379 goto done;
382 } else {
383 while (!isTombstone(start)) {
384 ++start;
385 if (start == end) {
386 goto done;
390 --end;
391 if (start == end) {
392 goto done;
394 while (isTombstone(end)) {
395 --end;
396 if (start == end) {
397 goto done;
400 copyElm(*end, *start);
402 done:
403 setPosLimit(start - data());
404 // The logic above possibly moved elements and tombstones around
405 // within the buffer, so we make sure m_pos is not pointing at
406 // garbage by resetting it. The logic above ensures that the first
407 // slot is not a tombstone, so it's safe to set m_pos to 0.
408 arrayData()->m_pos = 0;
409 assertx(!hasTombstones());
410 if (checkTypes) {
411 return allStrs ? StringSort : allInts ? IntegerSort : GenericSort;
412 } else {
413 return GenericSort;
418 * postSort() runs after the sort has been performed. For c_Map,
419 * postSort() handles rebuilding the hash.
421 void HashCollection::postSort() { // Must provide the nothrow guarantee
422 arrayData()->postSort(false);
425 #define SORT_CASE(flag, cmp_type, acc_type) \
426 case flag: { \
427 if (ascending) { \
428 cmp_type##Compare<acc_type, flag, true> comp; \
429 HPHP::Sort::sort(data(), data() + m_size, comp); \
430 } else { \
431 cmp_type##Compare<acc_type, flag, false> comp; \
432 HPHP::Sort::sort(data(), data() + m_size, comp); \
434 break; \
436 #define SORT_CASE_BLOCK(cmp_type, acc_type) \
437 switch (sort_flags) { \
438 default: /* fall through to SORT_REGULAR case */ \
439 SORT_CASE(SORT_REGULAR, cmp_type, acc_type) \
440 SORT_CASE(SORT_NUMERIC, cmp_type, acc_type) \
441 SORT_CASE(SORT_STRING, cmp_type, acc_type) \
442 SORT_CASE(SORT_LOCALE_STRING, cmp_type, acc_type) \
443 SORT_CASE(SORT_NATURAL, cmp_type, acc_type) \
444 SORT_CASE(SORT_NATURAL_CASE, cmp_type, acc_type) \
446 #define CALL_SORT(acc_type) \
447 if (flav == StringSort) { \
448 SORT_CASE_BLOCK(StrElm, acc_type) \
449 } else if (flav == IntegerSort) { \
450 SORT_CASE_BLOCK(IntElm, acc_type) \
451 } else { \
452 SORT_CASE_BLOCK(Elm, acc_type) \
454 #define SORT_BODY(acc_type) \
455 do { \
456 SortFlavor flav = preSort<acc_type>(acc_type(), true); \
457 try { \
458 CALL_SORT(acc_type); \
459 } catch (...) { \
460 /* make sure the map is left in a consistent state */ \
461 postSort(); \
462 throw; \
464 postSort(); \
465 } while(0)
467 void HashCollection::asort(int sort_flags, bool ascending) {
468 if (m_size <= 1) return;
469 mutate();
470 SORT_BODY(AssocValAccessor<HashCollection::Elm>);
473 void HashCollection::ksort(int sort_flags, bool ascending) {
474 if (m_size <= 1) return;
475 mutate();
476 SORT_BODY(AssocKeyAccessor<HashCollection::Elm>);
479 #undef SORT_CASE
480 #undef SORT_CASE_BLOCK
481 #undef CALL_SORT
482 #undef SORT_BODY
484 #define USER_SORT_BODY(acc_type) \
485 do { \
486 CallCtx ctx; \
487 CallerFrame cf; \
488 vm_decode_function(cmp_function, cf(), false, ctx); \
489 if (!ctx.func) { \
490 return false; \
492 preSort<acc_type>(acc_type(), false); \
493 SCOPE_EXIT { \
494 /* make sure the map is left in a consistent state */ \
495 postSort(); \
496 }; \
497 ElmUCompare<acc_type> comp; \
498 comp.ctx = &ctx; \
499 HPHP::Sort::sort(data(), data() + m_size, comp); \
500 return true; \
501 } while (0)
503 bool HashCollection::uasort(const Variant& cmp_function) {
504 if (m_size <= 1) return true;
505 mutate();
506 USER_SORT_BODY(AssocValAccessor<HashCollection::Elm>);
509 bool HashCollection::uksort(const Variant& cmp_function) {
510 if (m_size <= 1) return true;
511 mutate();
512 USER_SORT_BODY(AssocKeyAccessor<HashCollection::Elm>);
515 #undef USER_SORT_BODY
517 void HashCollection::mutateImpl() {
518 assertx(arrayData()->hasMultipleRefs());
519 dropImmCopy();
520 if (canMutateBuffer()) {
521 return;
523 auto* oldAd = arrayData();
524 m_arr = MixedArray::asMixed(MixedArray::Copy(oldAd));
525 assertx(oldAd->hasMultipleRefs());
526 oldAd->decRefCount();
529 /////////////////////////////////////////////////////////////////////////////