From 97d18a8d7a989703e67bbb9d3ab358c6da0a8bb6 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Mon, 6 Apr 2020 09:17:51 -0700 Subject: [PATCH] avoid copying an array when unset a nonexistent key Reviewed By: billschaller Differential Revision: D20635467 fbshipit-source-id: fa68141e67a339663bcd45014d71b9052b162243 --- hphp/runtime/base/hash-table-inl.h | 32 ++++---- hphp/runtime/base/hash-table.h | 93 +++++++++--------------- hphp/runtime/base/mixed-array.cpp | 83 ++++++++++----------- hphp/runtime/base/mixed-array.h | 7 +- hphp/runtime/base/set-array.cpp | 29 ++++---- hphp/runtime/base/set-array.h | 2 +- hphp/runtime/ext/collections/hash-collection.cpp | 12 +-- hphp/runtime/ext/collections/hash-collection.h | 20 ++--- 8 files changed, 121 insertions(+), 157 deletions(-) diff --git a/hphp/runtime/base/hash-table-inl.h b/hphp/runtime/base/hash-table-inl.h index 4b737d1f2cb..46a1a13fb6e 100644 --- a/hphp/runtime/base/hash-table-inl.h +++ b/hphp/runtime/base/hash-table-inl.h @@ -361,22 +361,21 @@ HashTable::findForNewInsertWarn(int32_t* table, // table elements exactly once. template -template +template ALWAYS_INLINE typename std::conditional< - type == HashTableCommon::FindType::Lookup || - type == HashTableCommon::FindType::Remove, + type == HashTableCommon::FindType::Lookup, int32_t, typename std::conditional< - type == HashTableCommon::FindType::Exists, - bool, - typename HashTableCommon::Inserter + type == HashTableCommon::FindType::Remove, + typename HashTableCommon::RemovePos, + typename std::conditional< + type == HashTableCommon::FindType::Exists, + bool, + typename HashTableCommon::Inserter + >::type >::type ->::type HashTable::findImpl(hash_t h0, - Hit hit, - RLambda remove) const { +>::type HashTable::findImpl(hash_t h0, Hit hit) const { static_assert( static_cast(FindType::Lookup) == 0 && static_cast(FindType::Exists) == 1 && @@ -397,20 +396,17 @@ typename std::conditional< assertx(0 <= pos); assertx(pos < capacity()); if (hit(elms[pos])) { - if (type == FindType::Remove) { - remove(elms[pos]); - *ei = Tombstone; - } return std::get(type)>( std::make_tuple(int32_t(pos), true, Inserter(nullptr), - Inserter(ei), int32_t(pos)) + Inserter(ei), + RemovePos{uint32_t(probe), int32_t(pos)}) ); } } else if (pos & 1) { assertx(pos == Empty); return std::get(type)>( - std::make_tuple(int32_t(pos), false, Inserter(ei), - Inserter(ei), int32_t(pos)) + std::make_tuple(int32_t(Empty), false, Inserter(ei), + Inserter(ei), RemovePos{}) ); } diff --git a/hphp/runtime/base/hash-table.h b/hphp/runtime/base/hash-table.h index 7c2a1a422c3..a49010f62ec 100644 --- a/hphp/runtime/base/hash-table.h +++ b/hphp/runtime/base/hash-table.h @@ -112,6 +112,14 @@ struct HashTableCommon { int32_t* ei; }; + struct RemovePos { + bool valid() const { + return validPos(elmIdx); + } + size_t probeIdx{0}; + int32_t elmIdx{Empty}; + }; + static ALWAYS_INLINE bool isValidIns(Inserter e) { return e.isValid(); @@ -334,8 +342,7 @@ public: h0, [ki](const Elm& e) { return hitIntKey(e, ki); - }, - [](Elm&){} + } ); } @@ -345,8 +352,7 @@ public: h0, [ks, h0](const Elm& e) { return hitStrKey(e, ks, h0); - }, - [](Elm&){} + } ); } @@ -356,8 +362,7 @@ public: h0, [ki](const Elm& e) { return hitIntKey(e, ki); - }, - [](Elm&){} + } ); } @@ -367,8 +372,7 @@ public: h0, [ki](const Elm& e) { return hitIntKey(e, ki); - }, - [](Elm&){} + } ); } @@ -378,8 +382,7 @@ public: h0, [ks, h0](const Elm& e) { return hitStrKey(e, ks, h0); - }, - [](Elm&){} + } ); } @@ -389,8 +392,7 @@ public: h0, [ki](const Elm& e) { return hitIntKey(e, ki); - }, - [](Elm&){} + } ); } @@ -400,78 +402,53 @@ public: h0, [ks, h0](const Elm& e) { return hitStrKey(e, ks, h0); - }, - [](Elm&){} + } ); } ALWAYS_INLINE - int32_t findForRemove(int64_t ki, hash_t h0) const { + auto findForRemove(int64_t ki, hash_t h0) const { return findImpl( h0, [ki](const Elm& e) { return hitIntKey(e, ki); - }, - [](Elm&){} + } ); } ALWAYS_INLINE - int32_t findForRemove(const StringData* ks, hash_t h0) const { - return findImpl( - h0, - [ks, h0](const Elm& e) { - return hitStrKey(e, ks, h0); - }, - [](Elm&){} - ); - } - - template ALWAYS_INLINE - int32_t findForRemove(int64_t ki, hash_t h0, Remove remove) const { - return findImpl( - h0, - [ki](const Elm& e) { - return hitIntKey(e, ki); - }, - remove - ); - } - - template ALWAYS_INLINE - int32_t findForRemove(const StringData* ks, hash_t h0, Remove remove) const { + auto findForRemove(const StringData* ks, hash_t h0) const { return findImpl( h0, [ks, h0](const Elm& e) { return hitStrKey(e, ks, h0); - }, - remove - ); + } + ); } template ALWAYS_INLINE - typename std::enable_if::value - && !std::is_pointer::value, int32_t>::type - findForRemove(hash_t h0, Hit hit) const { - return findImpl( - h0, - hit, - [](Elm&){} - ); + auto findForRemove( + typename std::enable_if::value && + !std::is_pointer::value, hash_t>::type h0, + Hit hit) const { + return findImpl(h0, hit); } protected: - template + template typename std::conditional< - type == HashTableCommon::FindType::Lookup || - type == HashTableCommon::FindType::Remove, + type == HashTableCommon::FindType::Lookup, int32_t, typename std::conditional< - type == HashTableCommon::FindType::Exists, - bool, - typename HashTableCommon::Inserter + type == HashTableCommon::FindType::Remove, + typename HashTableCommon::RemovePos, + typename std::conditional< + type == HashTableCommon::FindType::Exists, + bool, + typename HashTableCommon::Inserter + >::type >::type - >::type findImpl(hash_t h0, Hit hit, Remove remove) const; + >::type findImpl(hash_t h0, Hit hit) const; ///////////////////////////////////////////////////////////////////////////// // Iteration helpers. diff --git a/hphp/runtime/base/mixed-array.cpp b/hphp/runtime/base/mixed-array.cpp index c044ca38d15..5ec5611f33a 100644 --- a/hphp/runtime/base/mixed-array.cpp +++ b/hphp/runtime/base/mixed-array.cpp @@ -466,9 +466,12 @@ MixedArray* MixedArray::CopyMixed(const MixedArray& other, auto const ad = mode == AllocMode::Request ? reqAlloc(scale) : staticAlloc(scale, arrprov::tagSize(&other)); + // Copy everything including tombstones. This is a requirement for remove() to + // work correctly, which assumes the position is the same in the original and + // in the copy of the array, in case copying is needed. #ifdef USE_JEMALLOC - // Copy everything including tombstones. We want to copy the elements and - // the hash separately, because the array may not be very full. + // Copy elements and hashes separately, because the array may not be very + // full. assertx(reinterpret_cast(ad) % 16 == 0); assertx(reinterpret_cast(&other) % 16 == 0); // Adding 24 bytes so that we can copy in 32-byte groups. This might @@ -862,34 +865,6 @@ MixedArray::InsertPos MixedArray::insert(StringData* k) { return InsertPos(false, e->data); } -NEVER_INLINE -int32_t MixedArray::findForRemove(int64_t ki, inthash_t h, bool updateNext) { - // all vector methods should work w/out touching the hashtable - return findForRemove(ki, h, - [this, ki, updateNext] (Elm& e) { - assertx(ki == e.ikey); - // Conform to PHP5 behavior - // Hacky: don't removed the unsigned cast, else g++ can optimize away - // the check for == 0x7fff..., since there is no signed int k - // for which k-1 == 0x7fff... - if (((uint64_t)ki == (uint64_t)m_nextKI-1) && - (ki >= 0) && - (ki == 0x7fffffffffffffffLL || updateNext)) { - m_nextKI = ki; - } - } - ); -} - -int32_t MixedArray::findForRemove(const StringData* s, strhash_t h) { - return findForRemove(s, h, - [] (Elm& e) { - decRefStr(e.skey); - e.setIntKey(0, hash_int64(0)); - } - ); -} - bool MixedArray::ExistsInt(const ArrayData* ad, int64_t k) { return asMixed(ad)->findForExists(k, hash_int64(k)); } @@ -1199,16 +1174,29 @@ ArrayData* MixedArray::AddStr(ArrayData* ad, StringData* k, TypedValue v, bool c //============================================================================= // Delete. -void MixedArray::eraseNoCompact(ssize_t pos) { - assertx(validPos(pos)); +void MixedArray::updateNextKI(int64_t removedKi, bool updateNext) { + // Conform to PHP5 behavior + // Hacky: don't removed the unsigned cast, else g++ can optimize away + // the check for == 0x7fff..., since there is no signed int k + // for which k-1 == 0x7fff... + if (((uint64_t)removedKi == (uint64_t)m_nextKI-1) && + (removedKi >= 0) && + (removedKi == 0x7fffffffffffffffLL || updateNext)) { + m_nextKI = removedKi; + } +} + +void MixedArray::eraseNoCompact(RemovePos pos) { + assertx(pos.valid()); + hashTab()[pos.probeIdx] = Tombstone; // If the internal pointer points to this element, advance it. Elm* elms = data(); - if (m_pos == pos) { - m_pos = nextElm(elms, pos); + if (m_pos == pos.elmIdx) { + m_pos = nextElm(elms, pos.elmIdx); } - auto& e = elms[pos]; + auto& e = elms[pos.elmIdx]; auto const oldTV = e.data; if (e.hasStrKey()) { decRefStr(e.skey); @@ -1226,9 +1214,11 @@ void MixedArray::eraseNoCompact(ssize_t pos) { ArrayData* MixedArray::RemoveIntImpl(ArrayData* ad, int64_t k, bool copy) { auto a = asMixed(ad); + auto const pos = a->findForRemove(k, hash_int64(k)); + if (!pos.valid()) return a; if (copy) a = a->copyMixed(); - auto pos = a->findForRemove(k, hash_int64(k), false/*updateNext*/); - if (validPos(pos)) a->erase(pos); + a->updateNextKI(k, false); + a->erase(pos); return a; } @@ -1239,9 +1229,10 @@ ArrayData* MixedArray::RemoveInt(ArrayData* ad, int64_t k) { ArrayData* MixedArray::RemoveStrImpl(ArrayData* ad, const StringData* key, bool copy) { auto a = asMixed(ad); + auto const pos = a->findForRemove(key, key->hash()); + if (!pos.valid()) return a; if (copy) a = a->copyMixed(); - auto pos = a->findForRemove(key, key->hash()); - if (validPos(pos)) a->erase(pos); + a->erase(pos); return a; } @@ -1511,8 +1502,11 @@ ArrayData* MixedArray::Pop(ArrayData* ad, Variant& value) { assertx(!isTombstone(e.data.m_type)); value = tvAsCVarRef(&e.data); auto pos2 = e.hasStrKey() ? a->findForRemove(e.skey, e.hash()) - : a->findForRemove(e.ikey, e.hash(), true); - assertx(pos2 == pos); + : a->findForRemove(e.ikey, e.hash()); + assertx(pos2.elmIdx == pos); + if (!e.hasStrKey()) { + a->updateNextKI(e.ikey, true); + } a->erase(pos2); } else { value = uninit_null(); @@ -1534,8 +1528,11 @@ ArrayData* MixedArray::Dequeue(ArrayData* adInput, Variant& value) { assertx(!isTombstone(e.data.m_type)); value = tvAsCVarRef(&e.data); auto pos2 = e.hasStrKey() ? a->findForRemove(e.skey, e.hash()) - : a->findForRemove(e.ikey, e.hash(), false); - assertx(pos2 == pos); + : a->findForRemove(e.ikey, e.hash()); + if (!e.hasStrKey()) { + a->updateNextKI(e.ikey, false); + } + assertx(pos2.elmIdx == pos); a->erase(pos2); } else { value = uninit_null(); diff --git a/hphp/runtime/base/mixed-array.h b/hphp/runtime/base/mixed-array.h index 84f1f315139..25eb6fda8cf 100644 --- a/hphp/runtime/base/mixed-array.h +++ b/hphp/runtime/base/mixed-array.h @@ -606,8 +606,6 @@ private: InsertPos insert(StringData* k); using HashTable::findForRemove; - int32_t findForRemove(int64_t ki, inthash_t h, bool updateNext); - int32_t findForRemove(const StringData* s, strhash_t h); static ArrayData* RemoveIntImpl(ArrayData*, int64_t, bool); static ArrayData* RemoveStrImpl(ArrayData*, const StringData*, bool); @@ -622,8 +620,9 @@ private: // If "move" is false, this method will inc-ref data. template ArrayData* update(K k, TypedValue data); - void eraseNoCompact(ssize_t pos); - void erase(ssize_t pos) { + void updateNextKI(int64_t removedKey, bool updateNext); + void eraseNoCompact(RemovePos pos); + void erase(RemovePos pos) { eraseNoCompact(pos); if (m_size <= m_used / 2) { // Compact in order to keep elms from being overly sparse. diff --git a/hphp/runtime/base/set-array.cpp b/hphp/runtime/base/set-array.cpp index 15e1c4b77f7..3a8003c0cc1 100644 --- a/hphp/runtime/base/set-array.cpp +++ b/hphp/runtime/base/set-array.cpp @@ -336,16 +336,16 @@ void SetArray::insert(StringData* k, strhash_t h) { } void SetArray::insert(StringData* k) { return insert(k, k->hash()); } -void SetArray::erase(int32_t pos) { - assertx(pos < m_used); - assertx(0 <= pos); - auto const elms = data(); +void SetArray::erase(RemovePos pos) { + assertx(pos.valid() && pos.elmIdx < m_used); + hashTab()[pos.probeIdx] = Tombstone; - if (m_pos == pos) { - m_pos = nextElm(elms, pos); + auto const elms = data(); + if (m_pos == pos.elmIdx) { + m_pos = nextElm(elms, pos.elmIdx); } - auto& elm = elms[pos]; + auto& elm = elms[pos.elmIdx]; assertx(!elm.isInvalid()); tvDecRefGen(&elm.tv); elm.setTombstone(); @@ -617,11 +617,10 @@ ArrayData* SetArray::SetStr(ArrayData*, StringData*, TypedValue) { template ArrayData* SetArray::RemoveImpl(ArrayData* ad, K k, bool copy, SetArrayElm::hash_t h) { auto a = asSet(ad); + auto const pos = a->findForRemove(k, h); + if (!pos.valid()) return a; if (copy) a = a->copySet(); - auto const loc = a->findForRemove(k, h); - if (validPos(loc)) { - a->erase(loc); - } + a->erase(pos); return a; } @@ -691,10 +690,10 @@ ArrayData* SetArray::Pop(ArrayData* ad, Variant& value) { ssize_t pos = a->getIterLast(); tvDup(a->getElm(pos), *value.asTypedValue()); auto const pelm = &a->data()[pos]; - auto loc = a->findForRemove(pelm->hash(), + auto const loc = a->findForRemove(pelm->hash(), [pelm] (const Elm& e) { return &e == pelm; } ); - assertx(loc != Empty); + assertx(loc.valid()); a->erase(loc); } else { value = uninit_null(); @@ -714,10 +713,10 @@ ArrayData* SetArray::Dequeue(ArrayData* ad, Variant& value) { ssize_t pos = a->getIterBegin(); tvDup(a->getElm(pos), *value.asTypedValue()); auto const pelm = &a->data()[pos]; - auto loc = a->findForRemove(pelm->hash(), + auto const loc = a->findForRemove(pelm->hash(), [pelm] (const Elm& e) { return &e == pelm; } ); - assertx(loc != Empty); + assertx(loc.valid()); a->erase(loc); } else { value = uninit_null(); diff --git a/hphp/runtime/base/set-array.h b/hphp/runtime/base/set-array.h index 725438d77fc..036e19cc4af 100644 --- a/hphp/runtime/base/set-array.h +++ b/hphp/runtime/base/set-array.h @@ -282,7 +282,7 @@ private: ssize_t findElm(const Elm& e) const; - void erase(int32_t); + void erase(RemovePos); /* * Append idx at the end of the linked list containing the set diff --git a/hphp/runtime/ext/collections/hash-collection.cpp b/hphp/runtime/ext/collections/hash-collection.cpp index b84f3b770c6..4b30b4f3678 100644 --- a/hphp/runtime/ext/collections/hash-collection.cpp +++ b/hphp/runtime/ext/collections/hash-collection.cpp @@ -128,24 +128,24 @@ Array HashCollection::toValuesArray() { } void HashCollection::remove(int64_t key) { - mutate(); auto p = findForRemove(key, hash_int64(key)); - if (validPos(p)) { + if (p.valid()) { + mutate(); erase(p); } } void HashCollection::remove(StringData* key) { - mutate(); auto p = findForRemove(key, key->hash()); - if (validPos(p)) { + if (p.valid()) { + mutate(); erase(p); } } -void HashCollection::eraseNoCompact(ssize_t pos) { +void HashCollection::eraseNoCompact(MixedArray::RemovePos pos) { assertx(canMutateBuffer()); - assertx(validPos(pos) && !isTombstone(pos)); + assertx(pos.valid() && !isTombstone(pos.elmIdx)); assertx(m_size > 0); arrayData()->eraseNoCompact(pos); --m_size; diff --git a/hphp/runtime/ext/collections/hash-collection.h b/hphp/runtime/ext/collections/hash-collection.h index 09d4277f30a..a44d8021b38 100644 --- a/hphp/runtime/ext/collections/hash-collection.h +++ b/hphp/runtime/ext/collections/hash-collection.h @@ -84,8 +84,8 @@ struct HashCollection : ObjectData { void remove(int64_t key); void remove(StringData* key); - void eraseNoCompact(ssize_t pos); - void erase(ssize_t pos) { + void eraseNoCompact(MixedArray::RemovePos pos); + void erase(MixedArray::RemovePos pos) { eraseNoCompact(pos); compactOrShrinkIfDensityTooLow(); } @@ -423,28 +423,24 @@ struct HashCollection : ObjectData { return m_arr->find(s, h); } - ssize_t findForRemove(int64_t k, inthash_t h) { - assertx(canMutateBuffer()); - return m_arr->findForRemove(k, h, false); + auto findForRemove(int64_t k, inthash_t h) const { + return m_arr->findForRemove(k, h); } - ssize_t findForRemove(const StringData* s, strhash_t h) { - assertx(canMutateBuffer()); + auto findForRemove(const StringData* s, strhash_t h) const { return m_arr->findForRemove(s, h); } - MixedArray::Inserter findForInsert(int64_t ki, - inthash_t h) const { + MixedArray::Inserter findForInsert(int64_t ki, inthash_t h) const { return m_arr->findForInsertUpdate(ki, h); } - MixedArray::Inserter findForInsert(const StringData* s, - strhash_t h) const { + MixedArray::Inserter findForInsert(const StringData* s, strhash_t h) const { return m_arr->findForInsertUpdate(s, h); } MixedArray::Inserter findForNewInsert(int32_t* table, size_t mask, - hash_t h0) const { + hash_t h0) const { return m_arr->findForNewInsert(table, mask, h0); } -- 2.11.4.GIT