sync the repo
[hiphop-php.git] / hphp / runtime / base / hash-table.h
bloba19b435b9c59627a9ebfdb00dc88ee63c185c438
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #pragma once
19 #include "hphp/runtime/base/hash-table-x64.h"
20 #include "hphp/runtime/base/string-data.h"
21 #include "hphp/runtime/base/tv-uncounted.h"
23 // NvGetStr is implemented in assembly in hash-table-x64.S, additional macros
24 // are defined for various offsets in hash-table-x64.h
25 // Types inheriting from HashTable should add this macro to statically verify
26 // the offsets are correct.
27 #ifdef USE_X86_STRING_HELPERS
29 #define HASH_TABLE_CHECK_OFFSETS(ArrayType, ElmType) \
30 static_assert(ArrayType::dataOff() == ArrayType ## _DATA, ""); \
31 static_assert(ArrayType::scaleOff() == ArrayType ## _SCALE, ""); \
32 static_assert(ElmType::keyOff() == ElmType ## _KEY, ""); \
33 static_assert(ElmType::hashOff() == ElmType ## _HASH, ""); \
34 static_assert(ElmType::dataOff() == ElmType ## _DATA, ""); \
35 static_assert(ElmType::typeOff() == ElmType ## _TYPE, ""); \
36 static_assert(sizeof(ElmType) == ElmType ## _QUADWORDS * 8, "");
38 #else
39 #define HASH_TABLE_CHECK_OFFSETS(ArrayType, ElmType)
40 #endif
42 namespace HPHP {
44 ALWAYS_INLINE bool validPos(int32_t pos) {
45 return pos >= 0;
48 ALWAYS_INLINE bool validPos(ssize_t pos) {
49 return pos >= 0;
52 namespace array {
54 struct HashTableCommon {
55 // Load factor scaler. If S is the # of elements, C is the
56 // power-of-2 capacity, and L=LoadScale, we grow when S > C-C/L.
57 // So 2 gives 0.5 load factor, 4 gives 0.75 load factor, 8 gives
58 // 0.875 load factor. Use powers of 2 to enable shift-divide.
59 static constexpr uint32_t LoadScale = 4;
61 // Element index, with special values < 0 used for hash tables.
62 static constexpr int32_t Empty = -1;
63 static constexpr int32_t Tombstone = -2;
65 // Use a minimum of an 4-element hash table. Valid range: [2..32]
66 static constexpr uint32_t LgSmallScale = 0;
67 static constexpr uint32_t SmallScale = 1 << LgSmallScale;
68 static constexpr uint32_t SmallHashSize = SmallScale * 4;
69 static constexpr uint32_t SmallMask = SmallHashSize - 1; // 3
70 static constexpr uint32_t SmallSize = SmallScale * 3;
72 static constexpr uint64_t MaxHashSize = uint64_t(1) << 32;
73 static constexpr uint32_t MaxMask = MaxHashSize - 1;
74 static constexpr uint32_t MaxSize = MaxMask - MaxMask / LoadScale;
75 static constexpr uint32_t MaxMakeSize = 4 * SmallSize;
76 static constexpr uint32_t MaxScale = MaxHashSize / LoadScale;
78 constexpr static uint32_t HashSize(uint32_t scale) { return 4 * scale; }
79 constexpr static uint32_t Capacity(uint32_t scale) { return 3 * scale; }
80 constexpr static uint32_t Mask(uint32_t scale) { return 4 * scale - 1; }
82 ALWAYS_INLINE uint32_t capacity() const { return Capacity(m_scale); }
83 ALWAYS_INLINE uint32_t mask() const { return Mask(m_scale); }
84 ALWAYS_INLINE uint32_t scale() const { return m_scale; }
86 static constexpr size_t computeMaxElms(uint32_t mask) {
87 return mask - mask / LoadScale;
90 static uint32_t computeScaleFromSize(uint32_t n);
91 size_t hashSize() const;
93 enum FindType { Lookup, Exists, Insert, InsertUpdate, Remove };
95 struct Inserter {
96 explicit Inserter(int32_t *p) : ei(p) {}
97 Inserter& operator=(const int32_t pos) {
98 *ei = pos;
99 return *this;
101 bool isValid() const {
102 return ei != nullptr;
104 Inserter& operator*() {
105 return *this;
107 explicit operator int32_t() const {
108 return *ei;
110 private:
111 int32_t* ei;
114 struct RemovePos {
115 bool valid() const {
116 return validPos(elmIdx);
118 size_t probeIdx{0};
119 int32_t elmIdx{Empty};
122 static ALWAYS_INLINE
123 bool isValidIns(Inserter e) {
124 return e.isValid();
127 static ALWAYS_INLINE
128 bool isValidPos(Inserter e) {
129 assertx(isValidIns(e));
130 return (int32_t)(*e) >= 0;
132 protected:
133 static void InitHash(int32_t* hash, uint32_t scale);
134 static void CopyHash(int32_t* to, const int32_t* from, uint32_t scale);
135 bool isFull() const;
137 // Some of these are packed into qword-sized unions so we can
138 // combine stores during initialization. (gcc won't do it on its own.)
139 union {
140 struct {
141 uint32_t m_scale; // size-class equal to 1/4 table size
142 uint32_t m_used; // Number of used elements (values or tombstones)
144 uint64_t m_scale_used;
148 ///////////////////////////////////////////////////////////////////////////////
150 template <typename ArrayType, typename ElmType>
151 struct HashTable : HashTableCommon {
152 using Elm = ElmType;
153 using hash_t = typename Elm::hash_t;
155 /////////////////////////////////////////////////////////////////////////////
156 // Offset calculations.
157 /////////////////////////////////////////////////////////////////////////////
159 static ALWAYS_INLINE Elm* Data(const HashTable* ht) {
160 if (ht == nullptr) {
161 __builtin_unreachable();
163 return const_cast<Elm*>(
164 reinterpret_cast<Elm const*>(
165 static_cast<ArrayType const*>(ht) + 1
170 ALWAYS_INLINE Elm* data() const {
171 return Data(this);
174 static ALWAYS_INLINE int32_t* HashTab(const HashTable* ht, uint32_t scale) {
175 return const_cast<int32_t*>(
176 reinterpret_cast<int32_t const*>(
177 Data(ht) + static_cast<size_t>(scale) * 3
182 ALWAYS_INLINE int32_t* hashTab() const {
183 return HashTab(this, m_scale);
186 static constexpr ptrdiff_t dataOff() {
187 return sizeof(ArrayType);
189 static constexpr ptrdiff_t usedOff() {
190 return offsetof(ArrayType, m_used);
192 static constexpr ptrdiff_t usedSize() {
193 return sizeof(m_used);
195 static constexpr ptrdiff_t scaleOff() {
196 return offsetof(ArrayType, m_scale);
199 static constexpr ptrdiff_t elmOff(uint32_t pos) {
200 return dataOff() + pos * sizeof(Elm);
203 /////////////////////////////////////////////////////////////////////////////
204 // Allocation utilities.
205 /////////////////////////////////////////////////////////////////////////////
207 static ALWAYS_INLINE
208 ArrayType* reqAllocIndex(uint8_t index) {
209 return static_cast<ArrayType*>(tl_heap->objMallocIndex(index));
212 static ALWAYS_INLINE
213 ArrayType* staticAlloc(uint32_t scale, size_t extra = 0) {
214 extra = (extra + 15) & ~15ull;
215 auto const size = computeAllocBytes(scale) + extra;
216 auto const mem = RuntimeOption::EvalLowStaticArrays
217 ? lower_malloc(size)
218 : uncounted_malloc(size);
219 return reinterpret_cast<ArrayType*>(reinterpret_cast<char*>(mem) + extra);
222 static ALWAYS_INLINE
223 ArrayType* uncountedAlloc(uint32_t scale, size_t extra = 0) {
224 auto const size = computeAllocBytes(scale) + extra;
225 auto const mem = AllocUncounted(size);
226 return reinterpret_cast<ArrayType*>(reinterpret_cast<char*>(mem) + extra);
229 static ALWAYS_INLINE
230 constexpr size_t computeAllocBytes(uint32_t scale) {
231 return sizeof(ArrayType) +
232 HashSize(scale) * sizeof(int32_t) +
233 Capacity(scale) * sizeof(Elm);
236 static ALWAYS_INLINE
237 size_t computeAllocBytesFromMaxElms(uint32_t maxElms) {
238 auto const scale = computeScaleFromSize(maxElms);
239 return computeAllocBytes(scale);
242 static ALWAYS_INLINE
243 uint8_t computeIndexFromScale(uint32_t scale) {
244 return MemoryManager::size2Index(computeAllocBytes(scale));
247 /////////////////////////////////////////////////////////////////////////////
248 // Non variant interface
249 /////////////////////////////////////////////////////////////////////////////
251 static TypedValue NvGetInt(const ArrayData* ad, int64_t k);
252 static TypedValue NvGetStr(const ArrayData* ad, const StringData* k);
254 // Return the key at the given element, without any refcount ops.
255 static TypedValue GetPosKey(const ArrayData* ad, ssize_t pos);
258 /////////////////////////////////////////////////////////////////////////////
259 // Erase
260 /////////////////////////////////////////////////////////////////////////////
261 void eraseNoCompact(RemovePos pos) {
262 assertx(pos.valid());
263 hashTab()[pos.probeIdx] = Tombstone;
265 ElmType* elms = data();
266 auto& e = elms[pos.elmIdx];
267 e.ElmType::erase();
268 e.setTombstone();
270 --array()->m_size;
271 assertx(m_used <= capacity());
274 ALWAYS_INLINE void erase(RemovePos pos) {
275 array()->ArrayType::eraseNoCompact(pos);
276 if (array()->m_size <= m_used / 2) array()->compact();
279 /////////////////////////////////////////////////////////////////////////////
280 // findForNewInsertImpl
281 /////////////////////////////////////////////////////////////////////////////
283 * findForNewInsert() CANNOT be used unless the caller can guarantee that
284 * the relevant key is not already present in the array. Otherwise this can
285 * put the array into a bad state; use with caution. The *Warn
286 * version checks for the array becoming too unbalanced because of hash
287 * collisions, and is only called when an array Grow()s.
290 ALWAYS_INLINE
291 Inserter findForNewInsert(hash_t h0) const {
292 return findForNewInsert(hashTab(), mask(), h0);
295 ALWAYS_INLINE
296 Inserter findForNewInsertWarn(hash_t h0) const {
297 return findForNewInsertWarn(hashTab(), mask(), h0);
300 Inserter findForNewInsert(int32_t* table, size_t mask,
301 hash_t h0) const;
302 Inserter findForNewInsertWarn(int32_t* table, size_t mask,
303 hash_t h0) const;
306 * Helper routine for inserting elements into a new array
307 * when Grow()ing the array, that also checks for potentially
308 * unbalanced entries because of hash collision.
310 static ArrayType* InsertCheckUnbalanced(ArrayType* ad,
311 int32_t* table,
312 uint32_t mask,
313 ElmType* iter,
314 ElmType* stop);
316 /////////////////////////////////////////////////////////////////////////////
317 // Iteration
318 /////////////////////////////////////////////////////////////////////////////
319 ssize_t getIterBeginNotEmpty() const;
320 uint32_t iterLimit() const { return m_used; }
322 static ssize_t IterBegin(const ArrayData*);
323 static ssize_t IterLast(const ArrayData*);
324 static ssize_t IterEnd(const ArrayData*);
325 static ssize_t IterAdvance(const ArrayData*, ssize_t pos);
326 static ssize_t IterRewind(const ArrayData*, ssize_t pos);
328 /////////////////////////////////////////////////////////////////////////////
329 // Utility methods
330 /////////////////////////////////////////////////////////////////////////////
331 protected:
332 ALWAYS_INLINE Elm* allocElm(Inserter ei) {
333 assertx(!isValidPos(ei) && !isFull());
334 assertx(array()->m_size <= m_used);
335 ++(array()->m_size);
336 size_t i = m_used;
337 (*ei) = i;
338 m_used = i + 1;
339 return &data()[i];
342 ALWAYS_INLINE int32_t* initHash(uint32_t scale) {
343 auto table = HashTab(this, scale);
344 InitHash(table, scale);
345 return table;
348 static ALWAYS_INLINE bool hitIntKey(const Elm& e, int64_t ki) {
349 assertx(!e.isInvalid());
350 return e.intKey() == ki && e.hasIntKey();
353 static ALWAYS_INLINE bool hitStrKey(const Elm& e,
354 const StringData* ks,
355 hash_t h) {
356 assertx(!e.isInvalid());
358 * We do not have to check e.hasStrKey() because it is
359 * implicitely done by the check on the hash.
361 return e.hash() == h && (ks == e.strKey() || ks->same(e.strKey()));
364 /////////////////////////////////////////////////////////////////////////////
365 // findImpls
366 /////////////////////////////////////////////////////////////////////////////
367 public:
368 ALWAYS_INLINE
369 int32_t find(int64_t ki, hash_t h0) const {
370 return findImpl<FindType::Lookup>(
372 [ki](const Elm& e) {
373 return hitIntKey(e, ki);
378 ALWAYS_INLINE
379 int32_t find(const StringData* ks, hash_t h0) const {
380 return findImpl<FindType::Lookup>(
382 [ks, h0](const Elm& e) {
383 return hitStrKey(e, ks, h0);
388 ALWAYS_INLINE
389 bool findForExists(int64_t ki, hash_t h0) const {
390 return findImpl<FindType::Exists>(
392 [ki](const Elm& e) {
393 return hitIntKey(e, ki);
398 ALWAYS_INLINE
399 Inserter findForInsert(int64_t ki, hash_t h0) const {
400 return findImpl<FindType::Insert>(
402 [ki](const Elm& e) {
403 return hitIntKey(e, ki);
408 ALWAYS_INLINE
409 Inserter findForInsert(const StringData* ks, hash_t h0) const {
410 return findImpl<FindType::Insert>(
412 [ks, h0](const Elm& e) {
413 return hitStrKey(e, ks, h0);
418 ALWAYS_INLINE
419 Inserter findForInsertUpdate(int64_t ki, hash_t h0) const {
420 return findImpl<FindType::InsertUpdate>(
422 [ki](const Elm& e) {
423 return hitIntKey(e, ki);
428 ALWAYS_INLINE
429 Inserter findForInsertUpdate(const StringData* ks, hash_t h0) const {
430 return findImpl<FindType::InsertUpdate>(
432 [ks, h0](const Elm& e) {
433 return hitStrKey(e, ks, h0);
438 ALWAYS_INLINE
439 auto findForRemove(int64_t ki, hash_t h0) const {
440 return findImpl<FindType::Remove>(
442 [ki](const Elm& e) {
443 return hitIntKey(e, ki);
448 ALWAYS_INLINE
449 auto findForRemove(const StringData* ks, hash_t h0) const {
450 return findImpl<FindType::Remove>(
452 [ks, h0](const Elm& e) {
453 return hitStrKey(e, ks, h0);
458 template <typename Hit> ALWAYS_INLINE
459 auto findForRemove(
460 typename std::enable_if<!std::is_integral<Hit>::value &&
461 !std::is_pointer<Hit>::value, hash_t>::type h0,
462 Hit hit) const {
463 return findImpl<FindType::Remove>(h0, hit);
466 protected:
467 template <FindType type, typename Hit>
468 typename std::conditional<
469 type == HashTableCommon::FindType::Lookup,
470 int32_t,
471 typename std::conditional<
472 type == HashTableCommon::FindType::Remove,
473 typename HashTableCommon::RemovePos,
474 typename std::conditional<
475 type == HashTableCommon::FindType::Exists,
476 bool,
477 typename HashTableCommon::Inserter
478 >::type
479 >::type
480 >::type findImpl(hash_t h0, Hit hit) const;
482 /////////////////////////////////////////////////////////////////////////////
483 // Iteration helpers.
484 /////////////////////////////////////////////////////////////////////////////
485 ssize_t getIterBegin() const;
486 ssize_t getIterLast() const;
487 ssize_t getIterEnd() const;
489 ssize_t iter_advance_helper(ssize_t next_pos) const;
491 ssize_t nextElm(Elm* elms, ssize_t ei) const;
492 ssize_t prevElm(Elm* elms, ssize_t ei) const;
493 ssize_t nextElm(ssize_t ei) const;
495 /////////////////////////////////////////////////////////////////////////////
496 // Downcast helpers
497 /////////////////////////////////////////////////////////////////////////////
498 private:
499 static ALWAYS_INLINE
500 ArrayType* asArrayType(ArrayData* ad) {
501 assertx(ad->isVanillaDict() || ad->isVanillaKeyset());
502 auto a = static_cast<ArrayType*>(ad);
503 assertx(a->checkInvariants());
504 return a;
506 static ALWAYS_INLINE
507 const ArrayType* asArrayType(const ArrayData* ad) {
508 assertx(ad->isVanillaDict() || ad->isVanillaKeyset());
509 auto a = static_cast<const ArrayType*>(ad);
510 assertx(a->checkInvariants());
511 return a;
514 ALWAYS_INLINE ArrayType* array() {
515 return static_cast<ArrayType*>(this);
517 ALWAYS_INLINE const ArrayType* array() const {
518 return static_cast<ArrayType const*>(this);
522 } // namespace array
523 } // namespace HPHP
525 #include "hphp/runtime/base/hash-table-inl.h"