2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_HASH_H_
18 #define incl_HPHP_HASH_H_
23 #include "hphp/util/portability.h"
26 ///////////////////////////////////////////////////////////////////////////////
28 typedef int32_t strhash_t
;
29 const strhash_t STRHASH_MASK
= 0x7fffffff;
30 const strhash_t STRHASH_MSB
= 0x80000000;
33 * "64 bit Mix Functions", from Thomas Wang's "Integer Hash Function."
34 * http://www.concentric.net/~ttwang/tech/inthash.htm
36 inline long long hash_int64(long long key
) {
37 key
= (~key
) + (key
<< 21); // key = (key << 21) - key - 1;
38 key
= key
^ ((unsigned long long)key
>> 24);
39 key
= (key
+ (key
<< 3)) + (key
<< 8); // key * 265
40 key
= key
^ ((unsigned long long)key
>> 14);
41 key
= (key
+ (key
<< 2)) + (key
<< 4); // key * 21
42 key
= key
^ ((unsigned long long)key
>> 28);
43 key
= key
+ (key
<< 31);
44 return key
< 0 ? -key
: key
;
47 inline long long hash_int64_pair(long long k1
, long long k2
) {
48 // Shift the first key, so (a,b) hashes somewhere other than (b,a)
49 return (hash_int64(k1
) << 1) ^ hash_int64(k2
);
52 namespace MurmurHash3
{
53 ///////////////////////////////////////////////////////////////////////////////
54 // The following code is based on MurmurHash3:
55 // http://code.google.com/p/smhasher/wiki/MurmurHash3
57 // The case-insensitive version converts lowercase characters to uppercase
58 // under the assumption that character data are 7-bit ASCII. This should work
59 // as identifiers usually only contain alphanumeric characters and the
60 // underscore. Although PHP allows higher ASCII characters (> 127) in an
61 // identifier, they should be very rare, and do not change the correctness.
63 // It is tempting to make useHash128 depend on whether the architecture is 32-
64 // or 64-bit, but changing which hash function is used also requires
65 // regenerating system files, so this setting is hardcoded here.
66 const bool useHash128
= true;
68 #define ROTL32(x,y) rotl32(x,y)
69 #define ROTL64(x,y) rotl64(x,y)
70 #define BIG_CONSTANT(x) (x##LLU)
72 ALWAYS_INLINE
uint64_t rotl64(uint64_t x
, int8_t r
) {
73 return (x
<< r
) | (x
>> (64 - r
));
76 ALWAYS_INLINE
uint32_t rotl32(uint32_t x
, int8_t r
) {
77 return (x
<< r
) | (x
>> (32 - r
));
80 template <bool caseSensitive
>
81 ALWAYS_INLINE
uint64_t getblock64(const uint64_t *p
, int i
) {
82 uint64_t block
= p
[i
];
84 block
&= 0xdfdfdfdfdfdfdfdfLLU
; // a-z => A-Z
89 template <bool caseSensitive
>
90 ALWAYS_INLINE
uint32_t getblock32(const uint32_t *p
, int i
) {
91 uint32_t block
= p
[i
];
93 block
&= 0xdfdfdfdfU
; // a-z => A-Z
98 template <bool caseSensitive
>
99 ALWAYS_INLINE
uint8_t getblock8(const uint8_t *p
, int i
) {
100 uint8_t block
= p
[i
];
101 if (!caseSensitive
) {
102 block
&= 0xdfU
; // a-z => A-Z
107 //-----------------------------------------------------------------------------
108 // Finalization mix - force all bits of a hash block to avalanche
109 ALWAYS_INLINE
uint64_t fmix64(uint64_t k
) {
111 k
*= BIG_CONSTANT(0xff51afd7ed558ccd);
113 k
*= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
118 ALWAYS_INLINE
uint32_t fmix32(uint32_t h
) {
127 template <bool caseSensitive
>
128 ALWAYS_INLINE
uint32_t hash32(const void *key
, size_t len
, uint32_t seed
) {
129 const uint8_t *data
= (const uint8_t *)key
;
130 const size_t nblocks
= len
/ 4;
132 const uint32_t c1
= 0xcc9e2d51;
133 const uint32_t c2
= 0x1b873593;
137 const uint32_t *blocks
= (const uint32_t *)(data
+ nblocks
*4);
138 for(size_t i
= -nblocks
; i
; i
++) {
139 uint32_t k1
= getblock32
<caseSensitive
>(blocks
, i
);
145 h1
= h1
*5+0xe6546b64;
150 const uint8_t *tail
= (const uint8_t*)(data
+ nblocks
*4);
153 case 3: k1
^= getblock8
<caseSensitive
>(tail
, 2) << 16;
154 case 2: k1
^= getblock8
<caseSensitive
>(tail
, 1) << 8;
155 case 1: k1
^= getblock8
<caseSensitive
>(tail
, 0);
156 k1
*= c1
; k1
= ROTL32(k1
,15); k1
*= c2
; h1
^= k1
;
167 // Optimized for 64-bit architectures. MurmurHash3 also implements a 128-bit
168 // hash that is optimized for 32-bit architectures (omitted here).
169 template <bool caseSensitive
>
170 ALWAYS_INLINE
void hash128(const void *key
, size_t len
, uint64_t seed
,
172 const uint8_t *data
= (const uint8_t *)key
;
173 const size_t nblocks
= len
/ 16;
176 const uint64_t c1
= BIG_CONSTANT(0x87c37b91114253d5);
177 const uint64_t c2
= BIG_CONSTANT(0x4cf5ad432745937f);
181 const uint64_t *blocks
= (const uint64_t *)(data
);
182 for(size_t i
= 0; i
< nblocks
; i
++)
184 uint64_t k1
= getblock64
<caseSensitive
>(blocks
,i
*2+0);
185 uint64_t k2
= getblock64
<caseSensitive
>(blocks
,i
*2+1);
186 k1
*= c1
; k1
= ROTL64(k1
,31); k1
*= c2
; h1
^= k1
;
187 h1
= ROTL64(h1
,27); h1
+= h2
; h1
= h1
*5+0x52dce729;
188 k2
*= c2
; k2
= ROTL64(k2
,33); k2
*= c1
; h2
^= k2
;
189 h2
= ROTL64(h2
,31); h2
+= h1
; h2
= h2
*5+0x38495ab5;
194 const uint8_t *tail
= (const uint8_t*)(data
+ nblocks
*16);
199 case 15: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 14)) << 48;
200 case 14: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 13)) << 40;
201 case 13: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 12)) << 32;
202 case 12: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 11)) << 24;
203 case 11: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 10)) << 16;
204 case 10: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 9)) << 8;
205 case 9: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 8)) << 0;
206 k2
*= c2
; k2
= ROTL64(k2
,33); k2
*= c1
; h2
^= k2
;
208 case 8: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 7)) << 56;
209 case 7: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 6)) << 48;
210 case 6: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 5)) << 40;
211 case 5: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 4)) << 32;
212 case 4: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 3)) << 24;
213 case 3: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 2)) << 16;
214 case 2: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 1)) << 8;
215 case 1: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 0)) << 0;
216 k1
*= c1
; k1
= ROTL64(k1
,31); k1
*= c2
; h1
^= k1
;
221 h1
^= len
; h2
^= len
;
229 ((uint64_t*)out
)[0] = h1
;
230 ((uint64_t*)out
)[1] = h2
;
236 ///////////////////////////////////////////////////////////////////////////////
237 } // namespace MurmurHash3
239 inline strhash_t
hash_string_cs(const char *arKey
, int nKeyLength
) {
240 if (MurmurHash3::useHash128
) {
242 MurmurHash3::hash128
<true>(arKey
, nKeyLength
, 0, h
);
243 return strhash_t(h
[0] & STRHASH_MASK
);
245 uint32_t h
= MurmurHash3::hash32
<true>(arKey
, nKeyLength
, 0);
246 return strhash_t(h
& STRHASH_MASK
);
250 strhash_t
hash_string_i(const char *arKey
, int nKeyLength
);
251 strhash_t
hash_string(const char *arKey
, int nKeyLength
);
253 inline strhash_t
hash_string_i_inline(const char *arKey
, int nKeyLength
) {
254 if (MurmurHash3::useHash128
) {
256 MurmurHash3::hash128
<false>(arKey
, nKeyLength
, 0, h
);
257 return strhash_t(h
[0] & STRHASH_MASK
);
259 uint32_t h
= MurmurHash3::hash32
<false>(arKey
, nKeyLength
, 0);
260 return strhash_t(h
& STRHASH_MASK
);
264 inline strhash_t
hash_string_inline(const char *arKey
, int nKeyLength
) {
265 return hash_string_i(arKey
, nKeyLength
);
269 * We probably should get rid of this, so to detect code generation errors,
270 * where a binary string is treated as a NULL-terminated literal. Do we ever
271 * allow binary strings as array keys or symbol names?
273 inline strhash_t
hash_string(const char *arKey
) {
274 return hash_string(arKey
, strlen(arKey
));
276 inline strhash_t
hash_string_i(const char *arKey
) {
277 return hash_string_i(arKey
, strlen(arKey
));
280 // This function returns true and sets the res parameter if arKey
281 // is a non-empty string that matches one of the following conditions:
282 // 1) The string is "0".
283 // 2) The string starts with a non-zero digit, followed by at most
284 // 18 more digits, and is less than or equal to 2^63 - 1.
285 // 3) The string starts with a negative sign, followed by a non-zero
286 // digit, followed by at most 18 more digits, and is greater than
287 // or equal to -2^63.
288 inline bool is_strictly_integer(const char* arKey
, size_t nKeyLength
,
290 if ((unsigned char)(arKey
[0] - '-') > ('9' - '-'))
292 if (nKeyLength
<= 19 ||
293 (arKey
[0] == '-' && nKeyLength
== 20)) {
294 unsigned long long num
= 0;
297 if (arKey
[0] == '-') {
300 // The string "-" is NOT strictly an integer
303 // A string that starts with "-0" is NOT strictly an integer
306 } else if (arKey
[0] == '0') {
307 // The string "0" is strictly an integer
308 if (nKeyLength
== 1) {
312 // A string that starts with "0" followed by at least one digit
313 // is NOT strictly an integer
317 for (; i
< nKeyLength
; ++i
) {
318 if (arKey
[i
] >= '0' && arKey
[i
] <= '9') {
319 num
= 10*num
+ arKey
[i
] - '0';
327 if (num
<= 0x7FFFFFFFFFFFFFFFULL
||
328 (neg
&& num
== 0x8000000000000000ULL
)) {
329 res
= neg
? 0 - num
: (long long)num
;
337 ///////////////////////////////////////////////////////////////////////////////
340 #endif // incl_HPHP_HASH_H_