2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_HASH_TABLE_H_
18 #define incl_HPHP_HASH_TABLE_H_
20 #include "hphp/runtime/base/hash-table-x64.h"
21 #include "hphp/runtime/base/string-data.h"
23 // NvGetStr is implemented in assembly in hash-table-x64.S, additional macros
24 // are defined for various offsets in hash-table-x64.h
25 // Types inheriting from HashTable should add this macro to statically verify
26 // the offsets are correct.
27 #if defined(__SSE4_2__) && defined(NO_M_DATA) && !defined(NO_HWCRC) && \
30 #define HASH_TABLE_CHECK_OFFSETS(ArrayType, ElmType) \
31 static_assert(ArrayType::dataOff() == ArrayType ## _DATA, ""); \
32 static_assert(ArrayType::scaleOff() == ArrayType ## _SCALE, ""); \
33 static_assert(ElmType::keyOff() == ElmType ## _KEY, ""); \
34 static_assert(ElmType::hashOff() == ElmType ## _HASH, ""); \
35 static_assert(ElmType::dataOff() == ElmType ## _DATA, ""); \
36 static_assert(sizeof(ElmType) == ElmType ## _QUADWORDS * 8, "");
39 #define HASH_TABLE_CHECK_OFFSETS(ArrayType, ElmType)
44 ALWAYS_INLINE
bool validPos(int32_t pos
) {
48 ALWAYS_INLINE
bool validPos(ssize_t pos
) {
54 struct HashTableCommon
{
55 // Load factor scaler. If S is the # of elements, C is the
56 // power-of-2 capacity, and L=LoadScale, we grow when S > C-C/L.
57 // So 2 gives 0.5 load factor, 4 gives 0.75 load factor, 8 gives
58 // 0.875 load factor. Use powers of 2 to enable shift-divide.
59 static constexpr uint32_t LoadScale
= 4;
61 // Element index, with special values < 0 used for hash tables.
62 static constexpr int32_t Empty
= -1;
63 static constexpr int32_t Tombstone
= -2;
65 // Use a minimum of an 4-element hash table. Valid range: [2..32]
66 static constexpr uint32_t LgSmallScale
= 0;
67 static constexpr uint32_t SmallScale
= 1 << LgSmallScale
;
68 static constexpr uint32_t SmallHashSize
= SmallScale
* 4;
69 static constexpr uint32_t SmallMask
= SmallHashSize
- 1; // 3
70 static constexpr uint32_t SmallSize
= SmallScale
* 3;
72 static constexpr uint64_t MaxHashSize
= uint64_t(1) << 32;
73 static constexpr uint32_t MaxMask
= MaxHashSize
- 1;
74 static constexpr uint32_t MaxSize
= MaxMask
- MaxMask
/ LoadScale
;
75 static constexpr uint32_t MaxMakeSize
= 4 * SmallSize
;
76 static constexpr uint32_t MaxScale
= MaxHashSize
/ LoadScale
;
78 constexpr static uint32_t HashSize(uint32_t scale
) { return 4 * scale
; }
79 constexpr static uint32_t Capacity(uint32_t scale
) { return 3 * scale
; }
80 constexpr static uint32_t Mask(uint32_t scale
) { return 4 * scale
- 1; }
82 ALWAYS_INLINE
uint32_t capacity() const { return Capacity(m_scale
); }
83 ALWAYS_INLINE
uint32_t mask() const { return Mask(m_scale
); }
84 ALWAYS_INLINE
uint32_t scale() const { return m_scale
; }
86 static constexpr size_t computeMaxElms(uint32_t mask
) {
87 return mask
- mask
/ LoadScale
;
90 static uint32_t computeScaleFromSize(uint32_t n
);
91 size_t hashSize() const;
93 enum FindType
{ Lookup
, Exists
, Insert
, InsertUpdate
, Remove
};
96 explicit Inserter(int32_t *p
) : ei(p
) {}
97 Inserter
& operator=(const int32_t pos
) {
101 bool isValid() const {
102 return ei
!= nullptr;
104 Inserter
& operator*() {
107 explicit operator int32_t() const {
115 bool isValidIns(Inserter e
) {
120 bool isValidPos(Inserter e
) {
121 assertx(isValidIns(e
));
122 return (int32_t)(*e
) >= 0;
125 static void InitHash(int32_t* hash
, uint32_t scale
);
126 static void CopyHash(int32_t* to
, const int32_t* from
, uint32_t scale
);
129 // Some of these are packed into qword-sized unions so we can
130 // combine stores during initialization. (gcc won't do it on its own.)
133 uint32_t m_scale
; // size-class equal to 1/4 table size
134 uint32_t m_used
; // Number of used elements (values or tombstones)
136 uint64_t m_scale_used
;
140 ///////////////////////////////////////////////////////////////////////////////
142 template <typename ArrayType
, typename ElmType
>
143 struct HashTable
: HashTableCommon
{
145 using hash_t
= typename
Elm::hash_t
;
147 /////////////////////////////////////////////////////////////////////////////
148 // Offset calculations.
149 /////////////////////////////////////////////////////////////////////////////
151 static ALWAYS_INLINE Elm
* Data(const HashTable
* ht
) {
153 __builtin_unreachable();
155 return const_cast<Elm
*>(
156 reinterpret_cast<Elm
const*>(
157 static_cast<ArrayType
const*>(ht
) + 1
162 ALWAYS_INLINE Elm
* data() const {
166 static ALWAYS_INLINE
int32_t* HashTab(const HashTable
* ht
, uint32_t scale
) {
167 return const_cast<int32_t*>(
168 reinterpret_cast<int32_t const*>(
169 Data(ht
) + static_cast<size_t>(scale
) * 3
174 ALWAYS_INLINE
int32_t* hashTab() const {
175 return HashTab(this, m_scale
);
178 static constexpr ptrdiff_t dataOff() {
179 return sizeof(ArrayType
);
181 static constexpr ptrdiff_t usedOff() {
182 return offsetof(ArrayType
, m_used
);
184 static constexpr ptrdiff_t scaleOff() {
185 return offsetof(ArrayType
, m_scale
);
188 static constexpr ptrdiff_t elmOff(uint32_t pos
) {
189 return dataOff() + pos
* sizeof(Elm
);
192 /////////////////////////////////////////////////////////////////////////////
193 // Allocation utilities.
194 /////////////////////////////////////////////////////////////////////////////
197 ArrayType
* reqAlloc(uint32_t scale
) {
198 auto const allocBytes
= computeAllocBytes(scale
);
199 return static_cast<ArrayType
*>(tl_heap
->objMalloc(allocBytes
));
203 ArrayType
* staticAlloc(uint32_t scale
) {
204 auto const allocBytes
= computeAllocBytes(scale
);
205 return static_cast<ArrayType
*>(RuntimeOption::EvalLowStaticArrays
?
206 low_malloc_data(allocBytes
) :
211 constexpr size_t computeAllocBytes(uint32_t scale
) {
212 return sizeof(ArrayType
) +
213 HashSize(scale
) * sizeof(int32_t) +
214 Capacity(scale
) * sizeof(Elm
);
218 size_t heapSize() const {
219 return computeAllocBytes(m_scale
);
223 size_t computeAllocBytesFromMaxElms(uint32_t maxElms
) {
224 auto const scale
= computeScaleFromSize(maxElms
);
225 return computeAllocBytes(scale
);
228 /////////////////////////////////////////////////////////////////////////////
229 // Non variant interface
230 /////////////////////////////////////////////////////////////////////////////
232 static member_rval::ptr_u
NvGetInt(const ArrayData
* ad
, int64_t k
);
233 static member_rval::ptr_u
NvGetStr(const ArrayData
* ad
, const StringData
* k
);
235 static member_rval
RvalInt(const ArrayData
* ad
, int64_t k
) {
236 return member_rval
{ ad
, NvGetInt(ad
, k
) };
238 static member_rval
RvalStr(const ArrayData
* ad
, const StringData
* k
) {
239 return member_rval
{ ad
, NvGetStr(ad
, k
) };
242 static Cell
NvGetKey(const ArrayData
* ad
, ssize_t pos
);
244 /////////////////////////////////////////////////////////////////////////////
245 // findForNewInsertImpl
246 /////////////////////////////////////////////////////////////////////////////
248 * findForNewInsert() CANNOT be used unless the caller can guarantee that
249 * the relevant key is not already present in the array. Otherwise this can
250 * put the array into a bad state; use with caution. The *Warn
251 * version checks for the array becoming too unbalanced because of hash
252 * collisions, and is only called when an array Grow()s.
256 Inserter
findForNewInsert(hash_t h0
) const {
257 return findForNewInsert(hashTab(), mask(), h0
);
261 Inserter
findForNewInsertWarn(hash_t h0
) const {
262 return findForNewInsertWarn(hashTab(), mask(), h0
);
265 Inserter
findForNewInsert(int32_t* table
, size_t mask
,
267 Inserter
findForNewInsertWarn(int32_t* table
, size_t mask
,
270 /////////////////////////////////////////////////////////////////////////////
272 /////////////////////////////////////////////////////////////////////////////
273 ssize_t
getIterBeginNotEmpty() const;
275 static ssize_t
IterBegin(const ArrayData
*);
276 static ssize_t
IterLast(const ArrayData
*);
277 static ssize_t
IterEnd(const ArrayData
*);
278 static ssize_t
IterAdvance(const ArrayData
*, ssize_t pos
);
279 static ssize_t
IterRewind(const ArrayData
*, ssize_t pos
);
281 /////////////////////////////////////////////////////////////////////////////
283 /////////////////////////////////////////////////////////////////////////////
285 ALWAYS_INLINE Elm
* allocElm(Inserter ei
) {
286 assertx(!isValidPos(ei
) && !isFull());
287 assertx(array()->m_size
<= m_used
);
295 ALWAYS_INLINE
int32_t* initHash(uint32_t scale
) {
296 auto table
= HashTab(this, scale
);
297 InitHash(table
, scale
);
301 // Hash table should be initialized before the header.
302 static void InitSmallHash(ArrayType
* a
);
304 static ALWAYS_INLINE
bool hitIntKey(const Elm
& e
, int64_t ki
) {
305 assertx(!e
.isInvalid());
306 return e
.intKey() == ki
&& e
.hasIntKey();
309 static ALWAYS_INLINE
bool hitStrKey(const Elm
& e
,
310 const StringData
* ks
,
312 assertx(!e
.isInvalid());
314 * We do not have to check e.hasStrKey() because it is
315 * implicitely done by the check on the hash.
317 return e
.hash() == h
&& (ks
== e
.strKey() || ks
->same(e
.strKey()));
320 /////////////////////////////////////////////////////////////////////////////
322 /////////////////////////////////////////////////////////////////////////////
325 int32_t find(int64_t ki
, hash_t h0
) const {
326 return findImpl
<FindType::Lookup
>(
329 return hitIntKey(e
, ki
);
336 int32_t find(const StringData
* ks
, hash_t h0
) const {
337 return findImpl
<FindType::Lookup
>(
339 [ks
, h0
](const Elm
& e
) {
340 return hitStrKey(e
, ks
, h0
);
347 bool findForExists(int64_t ki
, hash_t h0
) const {
348 return findImpl
<FindType::Exists
>(
351 return hitIntKey(e
, ki
);
358 bool findForExists(const StringData
* ks
, hash_t h0
) const {
359 return findImpl
<FindType::Exists
>(
361 [ks
, h0
](const Elm
& e
) {
362 return hitStrKey(e
, ks
, h0
);
369 Inserter
findForInsert(int64_t ki
, hash_t h0
) const {
370 return findImpl
<FindType::Insert
>(
373 return hitIntKey(e
, ki
);
380 Inserter
findForInsert(const StringData
* ks
, hash_t h0
) const {
381 return findImpl
<FindType::Insert
>(
383 [ks
, h0
](const Elm
& e
) {
384 return hitStrKey(e
, ks
, h0
);
391 Inserter
findForInsertUpdate(int64_t ki
, hash_t h0
) const {
392 return findImpl
<FindType::InsertUpdate
>(
395 return hitIntKey(e
, ki
);
402 Inserter
findForInsertUpdate(const StringData
* ks
, hash_t h0
) const {
403 return findImpl
<FindType::InsertUpdate
>(
405 [ks
, h0
](const Elm
& e
) {
406 return hitStrKey(e
, ks
, h0
);
413 int32_t findForRemove(int64_t ki
, hash_t h0
) const {
414 return findImpl
<FindType::Remove
>(
417 return hitIntKey(e
, ki
);
424 int32_t findForRemove(const StringData
* ks
, hash_t h0
) const {
425 return findImpl
<FindType::Remove
>(
427 [ks
, h0
](const Elm
& e
) {
428 return hitStrKey(e
, ks
, h0
);
434 template <typename Remove
> ALWAYS_INLINE
435 int32_t findForRemove(int64_t ki
, hash_t h0
, Remove remove
) const {
436 return findImpl
<FindType::Remove
>(
439 return hitIntKey(e
, ki
);
445 template <typename Remove
> ALWAYS_INLINE
446 int32_t findForRemove(const StringData
* ks
, hash_t h0
, Remove remove
) const {
447 return findImpl
<FindType::Remove
>(
449 [ks
, h0
](const Elm
& e
) {
450 return hitStrKey(e
, ks
, h0
);
456 template <typename Hit
> ALWAYS_INLINE
457 typename
std::enable_if
<!std::is_integral
<Hit
>::value
458 && !std::is_pointer
<Hit
>::value
, int32_t>::type
459 findForRemove(hash_t h0
, Hit hit
) const {
460 return findImpl
<FindType::Remove
>(
468 template <FindType type
, typename Hit
, typename Remove
>
469 typename
std::conditional
<
470 type
== HashTableCommon::FindType::Lookup
||
471 type
== HashTableCommon::FindType::Remove
,
473 typename
std::conditional
<
474 type
== HashTableCommon::FindType::Exists
,
476 typename
HashTableCommon::Inserter
478 >::type
findImpl(hash_t h0
, Hit hit
, Remove remove
) const;
480 /////////////////////////////////////////////////////////////////////////////
481 // Iteration helpers.
482 /////////////////////////////////////////////////////////////////////////////
483 ssize_t
getIterBegin() const;
484 ssize_t
getIterLast() const;
485 ssize_t
getIterEnd() const;
487 ssize_t
iter_advance_helper(ssize_t next_pos
) const;
489 ssize_t
nextElm(Elm
* elms
, ssize_t ei
) const;
490 ssize_t
prevElm(Elm
* elms
, ssize_t ei
) const;
491 ssize_t
nextElm(ssize_t ei
) const;
493 /////////////////////////////////////////////////////////////////////////////
495 /////////////////////////////////////////////////////////////////////////////
498 ArrayType
* asArrayType(ArrayData
* ad
) {
499 assertx(ad
->hasMixedLayout() || ad
->isKeyset());
500 auto a
= static_cast<ArrayType
*>(ad
);
501 assertx(a
->checkInvariants());
505 const ArrayType
* asArrayType(const ArrayData
* ad
) {
506 assertx(ad
->hasMixedLayout() || ad
->isKeyset());
507 auto a
= static_cast<const ArrayType
*>(ad
);
508 assertx(a
->checkInvariants());
512 ALWAYS_INLINE ArrayType
* array() {
513 return static_cast<ArrayType
*>(this);
515 ALWAYS_INLINE
const ArrayType
* array() const {
516 return static_cast<ArrayType
const*>(this);
523 #include "hphp/runtime/base/hash-table-inl.h"
525 #endif // incl_HPHP_HASH_TABLE_H_