hphp/runtime/ext/collections/hash-collection.cpp

   1 #include "hphp/runtime/ext/collections/hash-collection.h"
   2 #include "hphp/runtime/ext/collections/ext_collections-map.h"
   3 #include "hphp/runtime/ext/collections/ext_collections-set.h"
   4 #include "hphp/runtime/base/array-init.h"
   5 #include "hphp/runtime/base/mixed-array.h"
   6 #include "hphp/runtime/base/sort-helpers.h"
   7 #include "hphp/runtime/vm/vm-regs.h"
   8 #include "hphp/util/text-util.h"
   9
  10 namespace HPHP {
  11 /////////////////////////////////////////////////////////////////////////////
  12 // HashCollection
  13
  14 HashCollection::HashCollection(Class* cls, HeaderKind kind, uint32_t cap)
  15   : ObjectData(cls, NoInit{}, collections::objectFlags, kind)
  16   , m_unusedAndSize(0)
  17   , m_arr(cap == 0 ? staticEmptyDictArrayAsMixed() :
  18           MixedArray::asMixed(MixedArray::MakeReserveDict(cap)))
  19 {}
  20
  21 NEVER_INLINE
  22 void HashCollection::throwTooLarge() {
  23   assertx(getClassName().size() == 6);
  24   auto clsName = getClassName().get()->slice();
  25   String msg(130, ReserveString);
  26   auto buf = msg.bufferSlice();
  27   auto sz = snprintf(buf.data(), buf.size() + 1,
  28     "%s object has reached its maximum capacity of %u element "
  29     "slots and does not have room to add a new element",
  30     clsName.data() + 3, // strip "HH\" prefix
  31     MaxSize
  32   );
  33   msg.setSize(std::min<int>(sz, buf.size()));
  34   SystemLib::throwInvalidOperationExceptionObject(msg);
  35 }
  36
  37 NEVER_INLINE
  38 void HashCollection::throwReserveTooLarge() {
  39   assertx(getClassName().size() == 6);
  40   auto clsName = getClassName().get()->slice();
  41   String msg(80, ReserveString);
  42   auto buf = msg.bufferSlice();
  43   auto sz = snprintf(buf.data(), buf.size() + 1,
  44     "%s does not support reserving room for more than %u elements",
  45     clsName.data() + 3, // strip "HH\" prefix
  46     MaxReserveSize
  47   );
  48   msg.setSize(std::min<int>(sz, buf.size()));
  49   SystemLib::throwInvalidOperationExceptionObject(msg);
  50 }
  51
  52 NEVER_INLINE
  53 int32_t* HashCollection::warnUnbalanced(size_t n, int32_t* ei) const {
  54   if (n > size_t(RuntimeOption::MaxArrayChain)) {
  55     raise_error("%s is too unbalanced (%lu)",
  56                 getClassName().data() + 3, // strip "HH\" prefix
  57                 n);
  58   }
  59   return ei;
  60 }
  61
  62 NEVER_INLINE
  63 void HashCollection::warnOnStrIntDup() const {
  64   req::hash_set<int64_t> seenVals;
  65
  66   auto* eLimit = elmLimit();
  67   for (auto* e = firstElm(); e != eLimit; e = nextElm(e, eLimit)) {
  68     int64_t newVal = 0;
  69
  70     if (e->hasIntKey()) {
  71       newVal = e->ikey;
  72     } else {
  73       assertx(e->hasStrKey());
  74       // isStriclyInteger() puts the int value in newVal as a side effect.
  75       if (!e->skey->isStrictlyInteger(newVal)) continue;
  76     }
  77
  78     if (seenVals.find(newVal) != seenVals.end()) {
  79       auto cls = getVMClass()->name()->toCppString();
  80       auto pos = cls.rfind('\\');
  81       if (pos != std::string::npos) {
  82         cls = cls.substr(pos + 1);
  83       }
  84       raise_warning(
  85         "%s::toArray() for a %s containing both int(%" PRId64 ") "
  86         "and string('%" PRId64 "')",
  87         cls.c_str(),
  88         toLower(cls).c_str(),
  89         newVal,
  90         newVal
  91       );
  92
  93       return;
  94     }
  95
  96     seenVals.insert(newVal);
  97   }
  98   // Do nothing if no 'duplicates' were found.
  99 }
 100
 101 Array HashCollection::toArray() {
 102   if (!m_size) {
 103     return empty_array();
 104   }
 105   auto ad = arrayData()->toPHPArray(true);
 106   if (UNLIKELY(ad->size() < m_size)) warnOnStrIntDup();
 107   assertx(m_size);
 108   assertx(ad->m_pos == 0);
 109   return Array::attach(ad);
 110 }
 111
 112 Array HashCollection::toKeysArray() {
 113   PackedArrayInit ai(m_size);
 114   auto* eLimit = elmLimit();
 115   for (auto* e = firstElm(); e != eLimit; e = nextElm(e, eLimit)) {
 116     if (e->hasIntKey()) {
 117       ai.append(int64_t{e->ikey});
 118     } else {
 119       assertx(e->hasStrKey());
 120       ai.append(VarNR(e->skey).tv());
 121     }
 122   }
 123   return ai.toArray();
 124 }
 125
 126 Array HashCollection::toValuesArray() {
 127   PackedArrayInit ai(m_size);
 128   auto* eLimit = elmLimit();
 129   for (auto* e = firstElm(); e != eLimit; e = nextElm(e, eLimit)) {
 130     ai.append(tvAsCVarRef(&e->data));
 131   }
 132   return ai.toArray();
 133 }
 134
 135 void HashCollection::remove(int64_t key) {
 136   mutate();
 137   auto p = findForRemove(key, hash_int64(key));
 138   if (validPos(p)) {
 139     erase(p);
 140   }
 141 }
 142
 143 void HashCollection::remove(StringData* key) {
 144   mutate();
 145   auto p = findForRemove(key, key->hash());
 146   if (validPos(p)) {
 147     erase(p);
 148   }
 149 }
 150
 151 void HashCollection::eraseNoCompact(ssize_t pos) {
 152   assertx(canMutateBuffer());
 153   assertx(validPos(pos) && !isTombstone(pos));
 154   assertx(m_size > 0);
 155   arrayData()->eraseNoCompact(pos);
 156   --m_size;
 157 }
 158
 159 NEVER_INLINE
 160 void HashCollection::makeRoom() {
 161   assertx(isFull());
 162   assertx(posLimit() == cap());
 163   if (LIKELY(!isDensityTooLow())) {
 164     if (UNLIKELY(cap() == MaxSize)) {
 165       throwTooLarge();
 166     }
 167     assertx(scale() > 0);
 168     grow(scale() * 2);
 169   } else {
 170     compact();
 171   }
 172   assertx(canMutateBuffer());
 173   assertx(m_immCopy.isNull());
 174   assertx(!isFull());
 175 }
 176
 177 NEVER_INLINE
 178 void HashCollection::reserve(int64_t sz) {
 179   assertx(m_size <= posLimit() && posLimit() <= cap());
 180   auto cap = static_cast<int64_t>(this->cap());
 181   if (LIKELY(sz > cap)) {
 182     if (UNLIKELY(sz > int64_t(MaxReserveSize))) {
 183       throwReserveTooLarge();
 184     }
 185     // Fast path: The requested capacity is greater than the current capacity.
 186     // Grow to the smallest allowed capacity that is sufficient.
 187     grow(MixedArray::computeScaleFromSize(sz));
 188     assertx(canMutateBuffer());
 189     return;
 190   }
 191   if (LIKELY(!hasTombstones())) {
 192     // Fast path: There are no tombstones and the requested capacity is less
 193     // than or equal to the current capacity.
 194     mutate();
 195     return;
 196   }
 197   if (sz + int64_t(posLimit() - m_size) <= cap || isDensityTooLow()) {
 198     // If we reach this case, then either (1) density is too low (this is
 199     // possible because of methods like retain()), in which case we compact
 200     // to make room and return, OR (2) density is not too low and either
 201     // sz < m_size or there's enough room to add sz-m_size elements, in
 202     // which case we do nothing and return.
 203     compactOrShrinkIfDensityTooLow();
 204     assertx(sz + int64_t(posLimit() - m_size) <= cap);
 205     mutate();
 206     return;
 207   }
 208   // If we reach this case, then density is not too low and sz > m_size and
 209   // there is not enough room to add sz-m_size elements. While would could
 210   // compact to make room, it's better for Hysteresis if we grow capacity
 211   // by 2x instead.
 212   assertx(!isDensityTooLow());
 213   assertx(sz + int64_t(posLimit() - m_size) > cap);
 214   assertx(cap < MaxSize && tableMask() != 0);
 215   auto newScale = scale() * 2;
 216   assertx(sz > 0 && MixedArray::Capacity(newScale) >= sz);
 217   grow(newScale);
 218   assertx(canMutateBuffer());
 219 }
 220
 221 ALWAYS_INLINE
 222 void HashCollection::resizeHelper(uint32_t newCap) {
 223   assertx(newCap >= m_size);
 224   assertx(m_immCopy.isNull());
 225   // Allocate a new ArrayData with the specified capacity and dup
 226   // all the elements (without copying over tombstones).
 227   auto ad = arrayData() == staticEmptyDictArrayAsMixed() ?
 228     MixedArray::asMixed(MixedArray::MakeReserveDict(newCap)) :
 229     MixedArray::CopyReserve(m_arr, newCap);
 230   decRefArr(m_arr);
 231   m_arr = ad;
 232   assertx(canMutateBuffer());
 233 }
 234
 235 void HashCollection::grow(uint32_t newScale) {
 236   auto newCap = MixedArray::Capacity(newScale);
 237   assertx(m_size <= posLimit() && posLimit() <= cap() && cap() <= newCap);
 238   assertx(SmallSize <= newCap && newCap <= MaxSize);
 239   assertx(m_size <= newCap);
 240   auto oldAd = arrayData();
 241   dropImmCopy();
 242   if (m_size > 0 && !oldAd->cowCheck()) {
 243     m_arr = MixedArray::Grow(oldAd, newScale, false);
 244     decRefArr(oldAd);
 245   } else {
 246     // For cases where m_size is zero or the buffer's refcount is
 247     // greater than 1, call resizeHelper().
 248     resizeHelper(newCap);
 249   }
 250   assertx(canMutateBuffer());
 251   assertx(m_immCopy.isNull());
 252 }
 253
 254 void HashCollection::compact() {
 255   assertx(isDensityTooLow());
 256   dropImmCopy();
 257   if (!arrayData()->cowCheck()) {
 258     // MixedArray::compact can only handle cases where the buffer's
 259     // refcount is 1.
 260     arrayData()->compact(false);
 261   } else {
 262     // For cases where the buffer's refcount is greater than 1, call
 263     // resizeHelper().
 264     resizeHelper(cap());
 265   }
 266   assertx(canMutateBuffer());
 267   assertx(m_immCopy.isNull());
 268   assertx(!isDensityTooLow());
 269 }
 270
 271 void HashCollection::shrink(uint32_t oldCap /* = 0 */) {
 272   assertx(isCapacityTooHigh() && (oldCap == 0 || oldCap < cap()));
 273   assertx(m_size <= posLimit() && posLimit() <= cap());
 274   dropImmCopy();
 275   uint32_t newCap;
 276   if (oldCap != 0) {
 277     // If an old capacity was specified, use that
 278     newCap = oldCap;
 279     // .. unless the old capacity is too small, in which case we use the
 280     // smallest capacity that is large enough to hold the current number
 281     // of elements.
 282     for (; newCap < m_size; newCap <<= 1) {}
 283     assertx(newCap == computeMaxElms(folly::nextPowTwo<uint64_t>(newCap) - 1));
 284   } else {
 285     if (m_size == 0 && nextKI() == 0) {
 286       decRefArr(m_arr);
 287       m_arr = staticEmptyDictArrayAsMixed();
 288       return;
 289     }
 290     // If no old capacity was provided, we compute the largest capacity
 291     // where m_size/cap() is less than or equal to 0.5 for good hysteresis
 292     size_t doubleSz = size_t(m_size) * 2;
 293     uint32_t capThreshold = (doubleSz < size_t(MaxSize)) ? doubleSz : MaxSize;
 294     for (newCap = SmallSize * 2; newCap < capThreshold; newCap <<= 1) {}
 295   }
 296   assertx(SmallSize <= newCap && newCap <= MaxSize);
 297   assertx(m_size <= newCap);
 298   auto* oldAd = arrayData();
 299   if (!oldAd->cowCheck()) {
 300     // If the buffer's refcount is 1, we can teleport the elements
 301     // to a new buffer
 302     auto oldBuf = data();
 303     auto oldUsed = posLimit();
 304     auto oldNextKI = nextKI();
 305     m_arr = MixedArray::asMixed(MixedArray::MakeReserveDict(newCap));
 306     m_arr->m_size = m_size;
 307     auto data = this->data();
 308     auto table = hashTab();
 309     auto table_mask = tableMask();
 310     setPosLimit(m_size);
 311     setNextKI(oldNextKI);
 312     for (uint32_t frPos = 0, toPos = 0; toPos < m_size; ++toPos, ++frPos) {
 313       frPos = skipTombstonesNoBoundsCheck(frPos, oldUsed, oldBuf);
 314       copyElm(oldBuf[frPos], data[toPos]);
 315       *findForNewInsert(table, table_mask, data[toPos].probe()) = toPos;
 316     }
 317     oldAd->setZombie();
 318     decRefArr(oldAd);
 319   } else {
 320     // For cases where the buffer's refcount is greater than 1, call
 321     // resizeHelper()
 322     resizeHelper(newCap);
 323   }
 324   assertx(canMutateBuffer());
 325   assertx(m_immCopy.isNull());
 326   assertx(!isCapacityTooHigh() || newCap == oldCap);
 327 }
 328
 329 HashCollection::Elm& HashCollection::allocElmFront(MixedArray::Inserter ei) {
 330   assertx(MixedArray::isValidIns(ei) && !MixedArray::isValidPos(*ei));
 331   assertx(m_size <= posLimit() && posLimit() < cap());
 332   // Move the existing elements to make element slot 0 available.
 333   memmove(data() + 1, data(), posLimit() * sizeof(Elm));
 334   incPosLimit();
 335   // Update the hashtable to reflect the fact that everything was
 336   // moved over one position
 337   auto* hash = hashTab();
 338   auto* hashEnd = hash + arrayData()->hashSize();
 339   for (; hash != hashEnd; ++hash) {
 340     if (validPos(*hash)) {
 341       ++(*hash);
 342     }
 343   }
 344   // Set the hash entry we found to point to element slot 0.
 345   (*ei) = 0;
 346   // Adjust m_pos so that is points at this new first element.
 347   arrayData()->m_pos = 0;
 348   // Adjust size to reflect that we're adding a new element.
 349   incSize();
 350   // Store the value into element slot 0.
 351   return data()[0];
 352 }
 353
 354 /**
 355  * preSort() does an initial pass to do some preparatory work before the
 356  * sort algorithm runs. For sorts that use builtin comparators, the types
 357  * of values are also observed during this first pass. By observing the
 358  * types during this initial pass, we can often use a specialized
 359  * comparator and avoid performing type checks during the actual sort.
 360  */
 361 template <typename AccessorT>
 362 SortFlavor HashCollection::preSort(const AccessorT& acc, bool checkTypes) {
 363   assertx(m_size > 0);
 364   if (!checkTypes && !hasTombstones()) {
 365     // No need to loop over the elements, we're done
 366     return GenericSort;
 367   }
 368   auto* start = data();
 369   auto* end = data() + posLimit();
 370   bool allInts UNUSED = true;
 371   bool allStrs UNUSED = true;
 372   for (;;) {
 373     if (checkTypes) {
 374       while (!isTombstone(start)) {
 375         allInts = (allInts && acc.isInt(*start));
 376         allStrs = (allStrs && acc.isStr(*start));
 377         ++start;
 378         if (start == end) {
 379           goto done;
 380         }
 381       }
 382     } else {
 383       while (!isTombstone(start)) {
 384         ++start;
 385         if (start == end) {
 386           goto done;
 387         }
 388       }
 389     }
 390     --end;
 391     if (start == end) {
 392       goto done;
 393     }
 394     while (isTombstone(end)) {
 395       --end;
 396       if (start == end) {
 397         goto done;
 398       }
 399     }
 400     copyElm(*end, *start);
 401   }
 402   done:
 403   setPosLimit(start - data());
 404   // The logic above possibly moved elements and tombstones around
 405   // within the buffer, so we make sure m_pos is not pointing at
 406   // garbage by resetting it. The logic above ensures that the first
 407   // slot is not a tombstone, so it's safe to set m_pos to 0.
 408   arrayData()->m_pos = 0;
 409   assertx(!hasTombstones());
 410   if (checkTypes) {
 411     return allStrs ? StringSort : allInts ? IntegerSort : GenericSort;
 412   } else {
 413     return GenericSort;
 414   }
 415 }
 416
 417 /**
 418  * postSort() runs after the sort has been performed. For c_Map,
 419  * postSort() handles rebuilding the hash.
 420  */
 421 void HashCollection::postSort() {  // Must provide the nothrow guarantee
 422   arrayData()->postSort(false);
 423 }
 424
 425 #define SORT_CASE(flag, cmp_type, acc_type)                     \
 426   case flag: {                                                  \
 427     if (ascending) {                                            \
 428       cmp_type##Compare<acc_type, flag, true> comp;             \
 429       HPHP::Sort::sort(data(), data() + m_size, comp);          \
 430     } else {                                                    \
 431       cmp_type##Compare<acc_type, flag, false> comp;            \
 432       HPHP::Sort::sort(data(), data() + m_size, comp);          \
 433     }                                                           \
 434     break;                                                      \
 435   }
 436 #define SORT_CASE_BLOCK(cmp_type, acc_type)                     \
 437   switch (sort_flags) {                                         \
 438     default: /* fall through to SORT_REGULAR case */            \
 439       SORT_CASE(SORT_REGULAR, cmp_type, acc_type)               \
 440         SORT_CASE(SORT_NUMERIC, cmp_type, acc_type)             \
 441         SORT_CASE(SORT_STRING, cmp_type, acc_type)              \
 442         SORT_CASE(SORT_LOCALE_STRING, cmp_type, acc_type)       \
 443         SORT_CASE(SORT_NATURAL, cmp_type, acc_type)             \
 444         SORT_CASE(SORT_NATURAL_CASE, cmp_type, acc_type)        \
 445         }
 446 #define CALL_SORT(acc_type)                     \
 447   if (flav == StringSort) {                     \
 448     SORT_CASE_BLOCK(StrElm, acc_type)           \
 449       } else if (flav == IntegerSort) {         \
 450     SORT_CASE_BLOCK(IntElm, acc_type)           \
 451       } else {                                  \
 452     SORT_CASE_BLOCK(Elm, acc_type)              \
 453       }
 454 #define SORT_BODY(acc_type)                                     \
 455   do {                                                          \
 456     SortFlavor flav = preSort<acc_type>(acc_type(), true);      \
 457     try {                                                       \
 458       CALL_SORT(acc_type);                                      \
 459     } catch (...) {                                             \
 460       /* make sure the map is left in a consistent state */     \
 461       postSort();                                               \
 462       throw;                                                    \
 463     }                                                           \
 464     postSort();                                                 \
 465   } while(0)
 466
 467 void HashCollection::asort(int sort_flags, bool ascending) {
 468   if (m_size <= 1) return;
 469   mutate();
 470   SORT_BODY(AssocValAccessor<HashCollection::Elm>);
 471 }
 472
 473 void HashCollection::ksort(int sort_flags, bool ascending) {
 474   if (m_size <= 1) return;
 475   mutate();
 476   SORT_BODY(AssocKeyAccessor<HashCollection::Elm>);
 477 }
 478
 479 #undef SORT_CASE
 480 #undef SORT_CASE_BLOCK
 481 #undef CALL_SORT
 482 #undef SORT_BODY
 483
 484 #define USER_SORT_BODY(acc_type)                                \
 485   do {                                                          \
 486     CallCtx ctx;                                                \
 487     CallerFrame cf;                                             \
 488     vm_decode_function(cmp_function, cf(), false, ctx);         \
 489     if (!ctx.func) {                                            \
 490       return false;                                             \
 491     }                                                           \
 492     preSort<acc_type>(acc_type(), false);                       \
 493     SCOPE_EXIT {                                                \
 494       /* make sure the map is left in a consistent state */     \
 495       postSort();                                               \
 496     };                                                          \
 497     ElmUCompare<acc_type> comp;                                 \
 498     comp.ctx = &ctx;                                            \
 499     HPHP::Sort::sort(data(), data() + m_size, comp);            \
 500     return true;                                                \
 501   } while (0)
 502
 503 bool HashCollection::uasort(const Variant& cmp_function) {
 504   if (m_size <= 1) return true;
 505   mutate();
 506   USER_SORT_BODY(AssocValAccessor<HashCollection::Elm>);
 507 }
 508
 509 bool HashCollection::uksort(const Variant& cmp_function) {
 510   if (m_size <= 1) return true;
 511   mutate();
 512   USER_SORT_BODY(AssocKeyAccessor<HashCollection::Elm>);
 513 }
 514
 515 #undef USER_SORT_BODY
 516
 517 void HashCollection::mutateImpl() {
 518   assertx(arrayData()->hasMultipleRefs());
 519   dropImmCopy();
 520   if (canMutateBuffer()) {
 521     return;
 522   }
 523   auto* oldAd = arrayData();
 524   m_arr = MixedArray::asMixed(MixedArray::Copy(oldAd));
 525   assertx(oldAd->hasMultipleRefs());
 526   oldAd->decRefCount();
 527 }
 528
 529 /////////////////////////////////////////////////////////////////////////////
 530 }