2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_HASH_H_
18 #define incl_HPHP_HASH_H_
23 #include "hphp/util/portability.h"
25 #if defined __x86_64__
26 # if (!defined USE_SSECRC)
40 bool IsSSEHashSupported();
42 ///////////////////////////////////////////////////////////////////////////////
44 typedef int32_t strhash_t
;
45 const strhash_t STRHASH_MASK
= 0x7fffffff;
46 const strhash_t STRHASH_MSB
= 0x80000000;
48 inline long long hash_int64(long long key
) {
49 // "64 bit Mix Functions", from Thomas Wang's "Integer Hash Function."
50 // http://www.concentric.net/~ttwang/tech/inthash.htm
51 key
= (~key
) + (key
<< 21); // key = (key << 21) - key - 1;
52 key
= key
^ ((unsigned long long)key
>> 24);
53 key
= (key
+ (key
<< 3)) + (key
<< 8); // key * 265
54 key
= key
^ ((unsigned long long)key
>> 14);
55 key
= (key
+ (key
<< 2)) + (key
<< 4); // key * 21
56 key
= key
^ ((unsigned long long)key
>> 28);
57 key
= key
+ (key
<< 31);
58 return key
< 0 ? -key
: key
;
61 inline long long hash_int64_pair(long long k1
, long long k2
) {
62 // Shift the first key, so (a,b) hashes somewhere other than (b,a)
63 return (hash_int64(k1
) << 1) ^ hash_int64(k2
);
66 namespace MurmurHash3
{
67 ///////////////////////////////////////////////////////////////////////////////
68 // The following code is based on MurmurHash3:
69 // http://code.google.com/p/smhasher/wiki/MurmurHash3
71 // The case-insensitive version converts lowercase characters to uppercase
72 // under the assumption that character data are 7-bit ASCII. This should work
73 // as identifiers usually only contain alphanumeric characters and the
74 // underscore. Although PHP allows higher ASCII characters (> 127) in an
75 // identifier, they should be very rare, and do not change the correctness.
77 #define ROTL64(x,y) rotl64(x,y)
78 #define BIG_CONSTANT(x) (x##LLU)
80 ALWAYS_INLINE
uint64_t rotl64(uint64_t x
, int8_t r
) {
81 return (x
<< r
) | (x
>> (64 - r
));
84 template <bool caseSensitive
>
85 ALWAYS_INLINE
uint64_t getblock64(const uint64_t *p
, int i
) {
86 uint64_t block
= p
[i
];
88 block
&= 0xdfdfdfdfdfdfdfdfLLU
; // a-z => A-Z
93 template <bool caseSensitive
>
94 ALWAYS_INLINE
uint8_t getblock8(const uint8_t *p
, int i
) {
97 block
&= 0xdfU
; // a-z => A-Z
102 //-----------------------------------------------------------------------------
103 // Finalization mix - force all bits of a hash block to avalanche
104 ALWAYS_INLINE
uint64_t fmix64(uint64_t k
) {
106 k
*= BIG_CONSTANT(0xff51afd7ed558ccd);
108 k
*= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
113 // Optimized for 64-bit architectures. MurmurHash3 also implements a 128-bit
114 // hash that is optimized for 32-bit architectures (omitted here).
115 template <bool caseSensitive
>
116 ALWAYS_INLINE
void hash128(const void *key
, size_t len
, uint64_t seed
,
118 const uint8_t *data
= (const uint8_t *)key
;
119 const size_t nblocks
= len
/ 16;
122 const uint64_t c1
= BIG_CONSTANT(0x87c37b91114253d5);
123 const uint64_t c2
= BIG_CONSTANT(0x4cf5ad432745937f);
127 const uint64_t *blocks
= (const uint64_t *)(data
);
128 for(size_t i
= 0; i
< nblocks
; i
++)
130 uint64_t k1
= getblock64
<caseSensitive
>(blocks
,i
*2+0);
131 uint64_t k2
= getblock64
<caseSensitive
>(blocks
,i
*2+1);
132 k1
*= c1
; k1
= ROTL64(k1
,31); k1
*= c2
; h1
^= k1
;
133 h1
= ROTL64(h1
,27); h1
+= h2
; h1
= h1
*5+0x52dce729;
134 k2
*= c2
; k2
= ROTL64(k2
,33); k2
*= c1
; h2
^= k2
;
135 h2
= ROTL64(h2
,31); h2
+= h1
; h2
= h2
*5+0x38495ab5;
140 const uint8_t *tail
= (const uint8_t*)(data
+ nblocks
*16);
145 case 15: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 14)) << 48;
146 case 14: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 13)) << 40;
147 case 13: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 12)) << 32;
148 case 12: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 11)) << 24;
149 case 11: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 10)) << 16;
150 case 10: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 9)) << 8;
151 case 9: k2
^= uint64_t(getblock8
<caseSensitive
>(tail
, 8)) << 0;
152 k2
*= c2
; k2
= ROTL64(k2
,33); k2
*= c1
; h2
^= k2
;
154 case 8: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 7)) << 56;
155 case 7: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 6)) << 48;
156 case 6: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 5)) << 40;
157 case 5: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 4)) << 32;
158 case 4: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 3)) << 24;
159 case 3: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 2)) << 16;
160 case 2: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 1)) << 8;
161 case 1: k1
^= uint64_t(getblock8
<caseSensitive
>(tail
, 0)) << 0;
162 k1
*= c1
; k1
= ROTL64(k1
,31); k1
*= c2
; h1
^= k1
;
167 h1
^= len
; h2
^= len
;
175 ((uint64_t*)out
)[0] = h1
;
176 ((uint64_t*)out
)[1] = h2
;
181 ///////////////////////////////////////////////////////////////////////////////
182 } // namespace MurmurHash3
184 // Four functions for hashing: hash_string_(cs|i)(_unsafe)?.
185 // cs: case-sensitive;
186 // i: case-insensitive;
187 // unsafe: safe for strings aligned at 8-byte boundary;
189 // Fallback versions uses CRC hash when supported, and use MurmurHash otherwise.
190 strhash_t
hash_string_cs_fallback(const char *arKey
, uint32_t nKeyLength
);
191 strhash_t
hash_string_i_fallback(const char *arKey
, uint32_t nKeyLength
);
193 #if FACEBOOK && defined USE_SSECRC
195 strhash_t
hash_string_cs_unsafe(const char *arKey
, uint32_t nKeyLength
);
196 strhash_t
hash_string_i_unsafe(const char *arKey
, uint32_t nKeyLength
);
197 strhash_t
hash_string_cs(const char *arKey
, uint32_t nKeyLength
);
198 strhash_t
hash_string_i(const char *arKey
, uint32_t nKeyLength
);
202 inline strhash_t
hash_string_cs(const char *arKey
, uint32_t nKeyLength
) {
203 return hash_string_cs_fallback(arKey
, nKeyLength
);
207 strhash_t
hash_string_cs_unsafe(const char *arKey
, uint32_t nKeyLength
) {
208 return hash_string_cs_fallback(arKey
, nKeyLength
);
211 inline strhash_t
hash_string_i(const char *arKey
, uint32_t nKeyLength
) {
212 return hash_string_i_fallback(arKey
, nKeyLength
);
216 strhash_t
hash_string_i_unsafe(const char *arKey
, uint32_t nKeyLength
) {
217 return hash_string_i_fallback(arKey
, nKeyLength
);
222 inline strhash_t
hash_string(const char *arKey
, uint32_t nKeyLength
) {
223 return hash_string_i(arKey
, nKeyLength
);
226 inline strhash_t
hash_string_unsafe(const char *arKey
, uint32_t nKeyLength
) {
227 return hash_string_i_unsafe(arKey
, nKeyLength
);
231 * We probably should get rid of this, so to detect code generation errors,
232 * where a binary string is treated as a NULL-terminated literal. Do we ever
233 * allow binary strings as array keys or symbol names?
235 inline strhash_t
hash_string(const char *arKey
) {
236 return hash_string(arKey
, strlen(arKey
));
239 inline strhash_t
hash_string_i(const char *arKey
) {
240 return hash_string_i(arKey
, strlen(arKey
));
243 // This function returns true and sets the res parameter if arKey
244 // is a non-empty string that matches one of the following conditions:
245 // 1) The string is "0".
246 // 2) The string starts with a non-zero digit, followed by at most
247 // 18 more digits, and is less than or equal to 2^63 - 1.
248 // 3) The string starts with a negative sign, followed by a non-zero
249 // digit, followed by at most 18 more digits, and is greater than
250 // or equal to -2^63.
251 inline bool is_strictly_integer(const char* arKey
, size_t nKeyLength
,
253 if ((unsigned char)(arKey
[0] - '-') > ('9' - '-'))
255 if (nKeyLength
<= 19 ||
256 (arKey
[0] == '-' && nKeyLength
== 20)) {
257 unsigned long long num
= 0;
260 if (arKey
[0] == '-') {
263 // The string "-" is NOT strictly an integer
266 // A string that starts with "-0" is NOT strictly an integer
269 } else if (arKey
[0] == '0') {
270 // The string "0" is strictly an integer
271 if (nKeyLength
== 1) {
275 // A string that starts with "0" followed by at least one digit
276 // is NOT strictly an integer
280 for (; i
< nKeyLength
; ++i
) {
281 if (arKey
[i
] >= '0' && arKey
[i
] <= '9') {
282 num
= 10*num
+ (arKey
[i
] - '0');
290 if (num
<= 0x7FFFFFFFFFFFFFFFULL
||
291 (neg
&& num
== 0x8000000000000000ULL
)) {
292 res
= neg
? 0 - num
: (long long)num
;
301 ///////////////////////////////////////////////////////////////////////////////
305 // The following functions are implemented in ASM directly for x86_64.
307 HPHP::strhash_t
hash_string_cs_crc(const char*, uint32_t);
308 HPHP::strhash_t
hash_string_i_crc(const char*, uint32_t);
309 HPHP::strhash_t
hash_string_cs_unaligned_crc(const char*, uint32_t);
310 HPHP::strhash_t
hash_string_i_unaligned_crc(const char*, uint32_t);
311 HPHP::strhash_t
g_hashHelper_crc(const HPHP::StringData
*);
315 #endif // incl_HPHP_HASH_H_