2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
19 #include "hphp/runtime/base/hash-table-x64.h"
20 #include "hphp/runtime/base/string-data.h"
21 #include "hphp/runtime/base/tv-uncounted.h"
23 // NvGetStr is implemented in assembly in hash-table-x64.S, additional macros
24 // are defined for various offsets in hash-table-x64.h
25 // Types inheriting from HashTable should add this macro to statically verify
26 // the offsets are correct.
27 #ifdef USE_X86_STRING_HELPERS
29 #define HASH_TABLE_CHECK_OFFSETS(ArrayType, ElmType) \
30 static_assert(ArrayType::dataOff() == ArrayType ## _DATA, ""); \
31 static_assert(ArrayType::scaleOff() == ArrayType ## _SCALE, ""); \
32 static_assert(ElmType::keyOff() == ElmType ## _KEY, ""); \
33 static_assert(ElmType::hashOff() == ElmType ## _HASH, ""); \
34 static_assert(ElmType::dataOff() == ElmType ## _DATA, ""); \
35 static_assert(ElmType::typeOff() == ElmType ## _TYPE, ""); \
36 static_assert(sizeof(ElmType) == ElmType ## _QUADWORDS * 8, "");
39 #define HASH_TABLE_CHECK_OFFSETS(ArrayType, ElmType)
44 ALWAYS_INLINE
bool validPos(int32_t pos
) {
48 ALWAYS_INLINE
bool validPos(ssize_t pos
) {
54 struct HashTableCommon
{
55 // Load factor scaler. If S is the # of elements, C is the
56 // power-of-2 capacity, and L=LoadScale, we grow when S > C-C/L.
57 // So 2 gives 0.5 load factor, 4 gives 0.75 load factor, 8 gives
58 // 0.875 load factor. Use powers of 2 to enable shift-divide.
59 static constexpr uint32_t LoadScale
= 4;
61 // Element index, with special values < 0 used for hash tables.
62 static constexpr int32_t Empty
= -1;
63 static constexpr int32_t Tombstone
= -2;
65 // Use a minimum of an 4-element hash table. Valid range: [2..32]
66 static constexpr uint32_t LgSmallScale
= 0;
67 static constexpr uint32_t SmallScale
= 1 << LgSmallScale
;
68 static constexpr uint32_t SmallHashSize
= SmallScale
* 4;
69 static constexpr uint32_t SmallMask
= SmallHashSize
- 1; // 3
70 static constexpr uint32_t SmallSize
= SmallScale
* 3;
72 static constexpr uint64_t MaxHashSize
= uint64_t(1) << 32;
73 static constexpr uint32_t MaxMask
= MaxHashSize
- 1;
74 static constexpr uint32_t MaxSize
= MaxMask
- MaxMask
/ LoadScale
;
75 static constexpr uint32_t MaxMakeSize
= 4 * SmallSize
;
76 static constexpr uint32_t MaxScale
= MaxHashSize
/ LoadScale
;
78 constexpr static uint32_t HashSize(uint32_t scale
) { return 4 * scale
; }
79 constexpr static uint32_t Capacity(uint32_t scale
) { return 3 * scale
; }
80 constexpr static uint32_t Mask(uint32_t scale
) { return 4 * scale
- 1; }
82 ALWAYS_INLINE
uint32_t capacity() const { return Capacity(m_scale
); }
83 ALWAYS_INLINE
uint32_t mask() const { return Mask(m_scale
); }
84 ALWAYS_INLINE
uint32_t scale() const { return m_scale
; }
86 static constexpr size_t computeMaxElms(uint32_t mask
) {
87 return mask
- mask
/ LoadScale
;
90 static uint32_t computeScaleFromSize(uint32_t n
);
91 size_t hashSize() const;
93 enum FindType
{ Lookup
, Exists
, Insert
, InsertUpdate
, Remove
};
96 explicit Inserter(int32_t *p
) : ei(p
) {}
97 Inserter
& operator=(const int32_t pos
) {
101 bool isValid() const {
102 return ei
!= nullptr;
104 Inserter
& operator*() {
107 explicit operator int32_t() const {
116 return validPos(elmIdx
);
119 int32_t elmIdx
{Empty
};
123 bool isValidIns(Inserter e
) {
128 bool isValidPos(Inserter e
) {
129 assertx(isValidIns(e
));
130 return (int32_t)(*e
) >= 0;
133 static void InitHash(int32_t* hash
, uint32_t scale
);
134 static void CopyHash(int32_t* to
, const int32_t* from
, uint32_t scale
);
137 // Some of these are packed into qword-sized unions so we can
138 // combine stores during initialization. (gcc won't do it on its own.)
141 uint32_t m_scale
; // size-class equal to 1/4 table size
142 uint32_t m_used
; // Number of used elements (values or tombstones)
144 uint64_t m_scale_used
;
148 ///////////////////////////////////////////////////////////////////////////////
150 template <typename ArrayType
, typename ElmType
>
151 struct HashTable
: HashTableCommon
{
153 using hash_t
= typename
Elm::hash_t
;
155 /////////////////////////////////////////////////////////////////////////////
156 // Offset calculations.
157 /////////////////////////////////////////////////////////////////////////////
159 static ALWAYS_INLINE Elm
* Data(const HashTable
* ht
) {
161 __builtin_unreachable();
163 return const_cast<Elm
*>(
164 reinterpret_cast<Elm
const*>(
165 static_cast<ArrayType
const*>(ht
) + 1
170 ALWAYS_INLINE Elm
* data() const {
174 static ALWAYS_INLINE
int32_t* HashTab(const HashTable
* ht
, uint32_t scale
) {
175 return const_cast<int32_t*>(
176 reinterpret_cast<int32_t const*>(
177 Data(ht
) + static_cast<size_t>(scale
) * 3
182 ALWAYS_INLINE
int32_t* hashTab() const {
183 return HashTab(this, m_scale
);
186 static constexpr ptrdiff_t dataOff() {
187 return sizeof(ArrayType
);
189 static constexpr ptrdiff_t usedOff() {
190 return offsetof(ArrayType
, m_used
);
192 static constexpr ptrdiff_t usedSize() {
193 return sizeof(m_used
);
195 static constexpr ptrdiff_t scaleOff() {
196 return offsetof(ArrayType
, m_scale
);
199 static constexpr ptrdiff_t elmOff(uint32_t pos
) {
200 return dataOff() + pos
* sizeof(Elm
);
203 /////////////////////////////////////////////////////////////////////////////
204 // Allocation utilities.
205 /////////////////////////////////////////////////////////////////////////////
208 ArrayType
* reqAllocIndex(uint8_t index
) {
209 return static_cast<ArrayType
*>(tl_heap
->objMallocIndex(index
));
213 ArrayType
* staticAlloc(uint32_t scale
, size_t extra
= 0) {
214 extra
= (extra
+ 15) & ~15ull;
215 auto const size
= computeAllocBytes(scale
) + extra
;
216 auto const mem
= RuntimeOption::EvalLowStaticArrays
218 : uncounted_malloc(size
);
219 return reinterpret_cast<ArrayType
*>(reinterpret_cast<char*>(mem
) + extra
);
223 ArrayType
* uncountedAlloc(uint32_t scale
, size_t extra
= 0) {
224 auto const size
= computeAllocBytes(scale
) + extra
;
225 auto const mem
= AllocUncounted(size
);
226 return reinterpret_cast<ArrayType
*>(reinterpret_cast<char*>(mem
) + extra
);
230 constexpr size_t computeAllocBytes(uint32_t scale
) {
231 return sizeof(ArrayType
) +
232 HashSize(scale
) * sizeof(int32_t) +
233 Capacity(scale
) * sizeof(Elm
);
237 size_t computeAllocBytesFromMaxElms(uint32_t maxElms
) {
238 auto const scale
= computeScaleFromSize(maxElms
);
239 return computeAllocBytes(scale
);
243 uint8_t computeIndexFromScale(uint32_t scale
) {
244 return MemoryManager::size2Index(computeAllocBytes(scale
));
247 /////////////////////////////////////////////////////////////////////////////
248 // Non variant interface
249 /////////////////////////////////////////////////////////////////////////////
251 static TypedValue
NvGetInt(const ArrayData
* ad
, int64_t k
);
252 static TypedValue
NvGetStr(const ArrayData
* ad
, const StringData
* k
);
254 // Return the key at the given element, without any refcount ops.
255 static TypedValue
GetPosKey(const ArrayData
* ad
, ssize_t pos
);
258 /////////////////////////////////////////////////////////////////////////////
260 /////////////////////////////////////////////////////////////////////////////
261 void eraseNoCompact(RemovePos pos
) {
262 assertx(pos
.valid());
263 hashTab()[pos
.probeIdx
] = Tombstone
;
265 ElmType
* elms
= data();
266 auto& e
= elms
[pos
.elmIdx
];
271 assertx(m_used
<= capacity());
274 ALWAYS_INLINE
void erase(RemovePos pos
) {
275 array()->ArrayType::eraseNoCompact(pos
);
276 if (array()->m_size
<= m_used
/ 2) array()->compact();
279 /////////////////////////////////////////////////////////////////////////////
280 // findForNewInsertImpl
281 /////////////////////////////////////////////////////////////////////////////
283 * findForNewInsert() CANNOT be used unless the caller can guarantee that
284 * the relevant key is not already present in the array. Otherwise this can
285 * put the array into a bad state; use with caution. The *Warn
286 * version checks for the array becoming too unbalanced because of hash
287 * collisions, and is only called when an array Grow()s.
291 Inserter
findForNewInsert(hash_t h0
) const {
292 return findForNewInsert(hashTab(), mask(), h0
);
296 Inserter
findForNewInsertWarn(hash_t h0
) const {
297 return findForNewInsertWarn(hashTab(), mask(), h0
);
300 Inserter
findForNewInsert(int32_t* table
, size_t mask
,
302 Inserter
findForNewInsertWarn(int32_t* table
, size_t mask
,
306 * Helper routine for inserting elements into a new array
307 * when Grow()ing the array, that also checks for potentially
308 * unbalanced entries because of hash collision.
310 static ArrayType
* InsertCheckUnbalanced(ArrayType
* ad
,
316 /////////////////////////////////////////////////////////////////////////////
318 /////////////////////////////////////////////////////////////////////////////
319 ssize_t
getIterBeginNotEmpty() const;
320 uint32_t iterLimit() const { return m_used
; }
322 static ssize_t
IterBegin(const ArrayData
*);
323 static ssize_t
IterLast(const ArrayData
*);
324 static ssize_t
IterEnd(const ArrayData
*);
325 static ssize_t
IterAdvance(const ArrayData
*, ssize_t pos
);
326 static ssize_t
IterRewind(const ArrayData
*, ssize_t pos
);
328 /////////////////////////////////////////////////////////////////////////////
330 /////////////////////////////////////////////////////////////////////////////
332 ALWAYS_INLINE Elm
* allocElm(Inserter ei
) {
333 assertx(!isValidPos(ei
) && !isFull());
334 assertx(array()->m_size
<= m_used
);
342 ALWAYS_INLINE
int32_t* initHash(uint32_t scale
) {
343 auto table
= HashTab(this, scale
);
344 InitHash(table
, scale
);
348 static ALWAYS_INLINE
bool hitIntKey(const Elm
& e
, int64_t ki
) {
349 assertx(!e
.isInvalid());
350 return e
.intKey() == ki
&& e
.hasIntKey();
353 static ALWAYS_INLINE
bool hitStrKey(const Elm
& e
,
354 const StringData
* ks
,
356 assertx(!e
.isInvalid());
358 * We do not have to check e.hasStrKey() because it is
359 * implicitely done by the check on the hash.
361 return e
.hash() == h
&& (ks
== e
.strKey() || ks
->same(e
.strKey()));
364 /////////////////////////////////////////////////////////////////////////////
366 /////////////////////////////////////////////////////////////////////////////
369 int32_t find(int64_t ki
, hash_t h0
) const {
370 return findImpl
<FindType::Lookup
>(
373 return hitIntKey(e
, ki
);
379 int32_t find(const StringData
* ks
, hash_t h0
) const {
380 return findImpl
<FindType::Lookup
>(
382 [ks
, h0
](const Elm
& e
) {
383 return hitStrKey(e
, ks
, h0
);
389 bool findForExists(int64_t ki
, hash_t h0
) const {
390 return findImpl
<FindType::Exists
>(
393 return hitIntKey(e
, ki
);
399 Inserter
findForInsert(int64_t ki
, hash_t h0
) const {
400 return findImpl
<FindType::Insert
>(
403 return hitIntKey(e
, ki
);
409 Inserter
findForInsert(const StringData
* ks
, hash_t h0
) const {
410 return findImpl
<FindType::Insert
>(
412 [ks
, h0
](const Elm
& e
) {
413 return hitStrKey(e
, ks
, h0
);
419 Inserter
findForInsertUpdate(int64_t ki
, hash_t h0
) const {
420 return findImpl
<FindType::InsertUpdate
>(
423 return hitIntKey(e
, ki
);
429 Inserter
findForInsertUpdate(const StringData
* ks
, hash_t h0
) const {
430 return findImpl
<FindType::InsertUpdate
>(
432 [ks
, h0
](const Elm
& e
) {
433 return hitStrKey(e
, ks
, h0
);
439 auto findForRemove(int64_t ki
, hash_t h0
) const {
440 return findImpl
<FindType::Remove
>(
443 return hitIntKey(e
, ki
);
449 auto findForRemove(const StringData
* ks
, hash_t h0
) const {
450 return findImpl
<FindType::Remove
>(
452 [ks
, h0
](const Elm
& e
) {
453 return hitStrKey(e
, ks
, h0
);
458 template <typename Hit
> ALWAYS_INLINE
460 typename
std::enable_if
<!std::is_integral
<Hit
>::value
&&
461 !std::is_pointer
<Hit
>::value
, hash_t
>::type h0
,
463 return findImpl
<FindType::Remove
>(h0
, hit
);
467 template <FindType type
, typename Hit
>
468 typename
std::conditional
<
469 type
== HashTableCommon::FindType::Lookup
,
471 typename
std::conditional
<
472 type
== HashTableCommon::FindType::Remove
,
473 typename
HashTableCommon::RemovePos
,
474 typename
std::conditional
<
475 type
== HashTableCommon::FindType::Exists
,
477 typename
HashTableCommon::Inserter
480 >::type
findImpl(hash_t h0
, Hit hit
) const;
482 /////////////////////////////////////////////////////////////////////////////
483 // Iteration helpers.
484 /////////////////////////////////////////////////////////////////////////////
485 ssize_t
getIterBegin() const;
486 ssize_t
getIterLast() const;
487 ssize_t
getIterEnd() const;
489 ssize_t
iter_advance_helper(ssize_t next_pos
) const;
491 ssize_t
nextElm(Elm
* elms
, ssize_t ei
) const;
492 ssize_t
prevElm(Elm
* elms
, ssize_t ei
) const;
493 ssize_t
nextElm(ssize_t ei
) const;
495 /////////////////////////////////////////////////////////////////////////////
497 /////////////////////////////////////////////////////////////////////////////
500 ArrayType
* asArrayType(ArrayData
* ad
) {
501 assertx(ad
->isVanillaDict() || ad
->isVanillaKeyset());
502 auto a
= static_cast<ArrayType
*>(ad
);
503 assertx(a
->checkInvariants());
507 const ArrayType
* asArrayType(const ArrayData
* ad
) {
508 assertx(ad
->isVanillaDict() || ad
->isVanillaKeyset());
509 auto a
= static_cast<const ArrayType
*>(ad
);
510 assertx(a
->checkInvariants());
514 ALWAYS_INLINE ArrayType
* array() {
515 return static_cast<ArrayType
*>(this);
517 ALWAYS_INLINE
const ArrayType
* array() const {
518 return static_cast<ArrayType
const*>(this);
525 #include "hphp/runtime/base/hash-table-inl.h"