Remove typedef decl errors
[hiphop-php.git] / hphp / util / hash.h
blob099d0f5dbe9ac68d4565c17f4808b3f19e31bdd8
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_HASH_H_
18 #define incl_HPHP_HASH_H_
20 #include <stdint.h>
21 #include <cstring>
23 #include "hphp/util/portability.h"
25 #ifndef FACEBOOK
26 # include "hphp/util/hphp-config.h"
27 #endif
29 #if defined(__x86_64__) && !defined(_MSC_VER)
30 # if (!defined USE_HWCRC)
31 # define USE_HWCRC
32 # endif
33 #elif defined __aarch64__ && defined ENABLE_AARCH64_CRC
34 # if (!defined USE_HWCRC)
35 # define USE_HWCRC
36 # endif
37 #else
38 # undef USE_HWCRC
39 #endif
41 // Killswitch
42 #if NO_HWCRC
43 # undef USE_HWCRC
44 #endif
46 namespace HPHP {
48 bool IsHWHashSupported();
50 ///////////////////////////////////////////////////////////////////////////////
52 using strhash_t = int32_t;
53 using inthash_t = int32_t;
54 constexpr strhash_t STRHASH_MASK = 0x7fffffff;
55 constexpr strhash_t STRHASH_MSB = 0x80000000;
57 inline size_t hash_int64_fallback(uint64_t key) {
58 // "64 bit Mix Functions", from Thomas Wang's "Integer Hash Function."
59 // http://www.concentric.net/~ttwang/tech/inthash.htm
60 key = (~key) + (key << 21); // key = (key << 21) - key - 1;
61 key = key ^ (key >> 24);
62 key = (key + (key << 3)) + (key << 8); // key * 265
63 key = key ^ (key >> 14);
64 key = (key + (key << 2)) + (key << 4); // key * 21
65 key = key ^ (key >> 28);
66 return static_cast<size_t>(static_cast<uint32_t>(key));
69 ALWAYS_INLINE size_t hash_int64(uint64_t k) {
70 #if defined(USE_HWCRC) && defined(__SSE4_2__)
71 size_t h = 0;
72 __asm("crc32q %1, %0\n" : "+r"(h) : "rm"(k));
73 return h;
74 #elif defined(USE_HWCRC) && defined(ENABLE_AARCH64_CRC)
75 size_t res;
76 __asm("crc32cx %w0, wzr, %x1\n" : "=r"(res) : "r"(k));
77 return res;
78 #else
79 return hash_int64_fallback(k);
80 #endif
84 inline size_t hash_int64_pair(uint64_t k1, uint64_t k2) {
85 #if defined(USE_HWCRC) && defined(__SSE4_2__)
86 // crc32 is commutative, so we need to perturb k1 so that (k1, k2) hashes
87 // differently from (k2, k1).
88 k1 += k1;
89 __asm("crc32q %1, %0\n" : "+r" (k1) : "rm"(k2));
90 return k1;
91 #elif defined(USE_HWCRC) && defined(ENABLE_AARCH64_CRC)
92 size_t res;
93 k1 += k1;
94 __asm("crc32cx %w0, %w1, %x2\n" : "=r"(res) : "r"(k2), "r"(k1));
95 return res;
96 #else
97 return (hash_int64(k1) << 1) ^ hash_int64(k2);
98 #endif
102 namespace MurmurHash3 {
103 ///////////////////////////////////////////////////////////////////////////////
104 // The following code is based on MurmurHash3:
105 // http://code.google.com/p/smhasher/wiki/MurmurHash3
107 // The case-insensitive version converts lowercase characters to uppercase
108 // under the assumption that character data are 7-bit ASCII. This should work
109 // as identifiers usually only contain alphanumeric characters and the
110 // underscore. Although PHP allows higher ASCII characters (> 127) in an
111 // identifier, they should be very rare, and do not change the correctness.
113 #define ROTL64(x,y) rotl64(x,y)
114 #define BIG_CONSTANT(x) (x##LLU)
116 ALWAYS_INLINE uint64_t rotl64(uint64_t x, int8_t r) {
117 return (x << r) | (x >> (64 - r));
120 template <bool caseSensitive>
121 ALWAYS_INLINE uint64_t getblock64(const uint64_t *p, int i) {
122 uint64_t block = p[i];
123 if (!caseSensitive) {
124 block &= 0xdfdfdfdfdfdfdfdfLLU; // a-z => A-Z
126 return block;
129 template <bool caseSensitive>
130 ALWAYS_INLINE uint8_t getblock8(const uint8_t *p, int i) {
131 uint8_t block = p[i];
132 if (!caseSensitive) {
133 block &= 0xdfU; // a-z => A-Z
135 return block;
138 //-----------------------------------------------------------------------------
139 // Finalization mix - force all bits of a hash block to avalanche
140 ALWAYS_INLINE uint64_t fmix64(uint64_t k) {
141 k ^= k >> 33;
142 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
143 k ^= k >> 33;
144 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
145 k ^= k >> 33;
146 return k;
149 // Optimized for 64-bit architectures. MurmurHash3 also implements a 128-bit
150 // hash that is optimized for 32-bit architectures (omitted here).
151 template <bool caseSensitive>
152 ALWAYS_INLINE void hash128(const void *key, size_t len, uint64_t seed,
153 uint64_t out[2]) {
154 const uint8_t *data = (const uint8_t *)key;
155 const size_t nblocks = len / 16;
156 uint64_t h1 = seed;
157 uint64_t h2 = seed;
158 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
159 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
161 //----------
162 // body
163 const uint64_t *blocks = (const uint64_t *)(data);
164 for(size_t i = 0; i < nblocks; i++)
166 uint64_t k1 = getblock64<caseSensitive>(blocks,i*2+0);
167 uint64_t k2 = getblock64<caseSensitive>(blocks,i*2+1);
168 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
169 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
170 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
171 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
174 //----------
175 // tail
176 const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
177 uint64_t k1 = 0;
178 uint64_t k2 = 0;
179 switch(len & 15)
181 case 15: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 14)) << 48;
182 case 14: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 13)) << 40;
183 case 13: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 12)) << 32;
184 case 12: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 11)) << 24;
185 case 11: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 10)) << 16;
186 case 10: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 9)) << 8;
187 case 9: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 8)) << 0;
188 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
190 case 8: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 7)) << 56;
191 case 7: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 6)) << 48;
192 case 6: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 5)) << 40;
193 case 5: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 4)) << 32;
194 case 4: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 3)) << 24;
195 case 3: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 2)) << 16;
196 case 2: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 1)) << 8;
197 case 1: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 0)) << 0;
198 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
201 //----------
202 // finalization
203 h1 ^= len; h2 ^= len;
204 h1 += h2;
205 h2 += h1;
206 h1 = fmix64(h1);
207 h2 = fmix64(h2);
208 h1 += h2;
209 h2 += h1;
211 ((uint64_t*)out)[0] = h1;
212 ((uint64_t*)out)[1] = h2;
215 #undef ROTL64
216 #undef BIG_CONSTANT
217 ///////////////////////////////////////////////////////////////////////////////
218 } // namespace MurmurHash3
220 // Four functions for hashing: hash_string_(cs|i)(_unsafe)?.
221 // cs: case-sensitive;
222 // i: case-insensitive;
223 // unsafe: safe for strings aligned at 8-byte boundary;
225 #if defined USE_HWCRC && (defined __SSE4_2__ || defined ENABLE_AARCH64_CRC)
227 // We will surely use CRC32, these are implemented directly in hash-crc-*.S
228 strhash_t hash_string_cs_unsafe(const char *arKey, uint32_t nKeyLength);
229 strhash_t hash_string_i_unsafe(const char *arKey, uint32_t nKeyLength);
230 strhash_t hash_string_cs(const char *arKey, uint32_t nKeyLength);
231 strhash_t hash_string_i(const char *arKey, uint32_t nKeyLength);
232 #else
234 strhash_t hash_string_cs_fallback(const char*, uint32_t);
235 strhash_t hash_string_i_fallback(const char*, uint32_t);
237 // We may need to do CPUID checks in the fallback versions, only when we are not
238 // sure CRC hash is used.
239 inline strhash_t hash_string_cs(const char *arKey, uint32_t nKeyLength) {
240 return hash_string_cs_fallback(arKey, nKeyLength);
243 inline
244 strhash_t hash_string_cs_unsafe(const char *arKey, uint32_t nKeyLength) {
245 return hash_string_cs_fallback(arKey, nKeyLength);
248 inline strhash_t hash_string_i(const char *arKey, uint32_t nKeyLength) {
249 return hash_string_i_fallback(arKey, nKeyLength);
252 inline
253 strhash_t hash_string_i_unsafe(const char *arKey, uint32_t nKeyLength) {
254 return hash_string_i_fallback(arKey, nKeyLength);
257 #endif
259 // This function returns true and sets the res parameter if arKey
260 // is a non-empty string that matches one of the following conditions:
261 // 1) The string is "0".
262 // 2) The string starts with a non-zero digit, followed by at most
263 // 18 more digits, and is less than or equal to 2^63 - 1.
264 // 3) The string starts with a negative sign, followed by a non-zero
265 // digit, followed by at most 18 more digits, and is greater than
266 // or equal to -2^63.
267 inline bool is_strictly_integer(const char* arKey, size_t nKeyLength,
268 int64_t& res) {
269 if ((unsigned char)(arKey[0] - '-') > ('9' - '-'))
270 return false;
271 if (nKeyLength <= 19 ||
272 (arKey[0] == '-' && nKeyLength == 20)) {
273 unsigned long long num = 0;
274 bool neg = false;
275 unsigned i = 0;
276 if (arKey[0] == '-') {
277 neg = true;
278 i = 1;
279 // The string "-" is NOT strictly an integer
280 if (nKeyLength == 1)
281 return false;
282 // A string that starts with "-0" is NOT strictly an integer
283 if (arKey[1] == '0')
284 return false;
285 } else if (arKey[0] == '0') {
286 // The string "0" is strictly an integer
287 if (nKeyLength == 1) {
288 res = 0;
289 return true;
291 // A string that starts with "0" followed by at least one digit
292 // is NOT strictly an integer
293 return false;
295 bool good = true;
296 for (; i < nKeyLength; ++i) {
297 if (arKey[i] >= '0' && arKey[i] <= '9') {
298 num = 10*num + (arKey[i] - '0');
300 else {
301 good = false;
302 break;
305 if (good) {
306 if (num <= 0x7FFFFFFFFFFFFFFFULL ||
307 (neg && num == 0x8000000000000000ULL)) {
308 res = neg ? 0 - num : (long long)num;
309 return true;
313 return false;
316 struct StringData;
317 ///////////////////////////////////////////////////////////////////////////////
320 #if defined(USE_HWCRC) && !defined(__SSE4_2__) && !defined(_MSC_VER)
321 // The following functions are implemented in ASM directly for x86_64 and ARM
322 extern "C" {
323 HPHP::strhash_t hash_string_cs_crc(const char*, uint32_t);
324 HPHP::strhash_t hash_string_i_crc(const char*, uint32_t);
325 HPHP::strhash_t hash_string_cs_unaligned_crc(const char*, uint32_t);
326 HPHP::strhash_t hash_string_i_unaligned_crc(const char*, uint32_t);
328 #endif
330 #endif // incl_HPHP_HASH_H_