fixup yaml to be c++ compliant
[hiphop-php.git] / hphp / util / hash.h
blob3224a12266695cc3dee5784545ad43d06b42ff0f
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_HASH_H_
18 #define incl_HPHP_HASH_H_
20 #include <stdint.h>
21 #include <cstring>
23 #include "hphp/util/util.h"
25 namespace HPHP {
26 ///////////////////////////////////////////////////////////////////////////////
28 typedef int32_t strhash_t;
29 const strhash_t STRHASH_MASK = 0x7fffffff;
30 const strhash_t STRHASH_MSB = 0x80000000;
33 * "64 bit Mix Functions", from Thomas Wang's "Integer Hash Function."
34 * http://www.concentric.net/~ttwang/tech/inthash.htm
36 inline long long hash_int64(long long key) {
37 key = (~key) + (key << 21); // key = (key << 21) - key - 1;
38 key = key ^ ((unsigned long long)key >> 24);
39 key = (key + (key << 3)) + (key << 8); // key * 265
40 key = key ^ ((unsigned long long)key >> 14);
41 key = (key + (key << 2)) + (key << 4); // key * 21
42 key = key ^ ((unsigned long long)key >> 28);
43 key = key + (key << 31);
44 return key < 0 ? -key : key;
47 inline long long hash_int64_pair(long long k1, long long k2) {
48 // Shift the first key, so (a,b) hashes somewhere other than (b,a)
49 return (hash_int64(k1) << 1) ^ hash_int64(k2);
52 namespace MurmurHash3 {
53 ///////////////////////////////////////////////////////////////////////////////
54 // The following code is based on MurmurHash3:
55 // http://code.google.com/p/smhasher/wiki/MurmurHash3
57 // The case-insensitive version converts lowercase characters to uppercase
58 // under the assumption that character data are 7-bit ASCII. This should work
59 // as identifiers usually only contain alphanumeric characters and the
60 // underscore. Although PHP allows higher ASCII characters (> 127) in an
61 // identifier, they should be very rare, and do not change the correctness.
63 // It is tempting to make useHash128 depend on whether the architecture is 32-
64 // or 64-bit, but changing which hash function is used also requires
65 // regenerating system files, so this setting is hardcoded here.
66 const bool useHash128 = true;
68 #define ROTL32(x,y) rotl32(x,y)
69 #define ROTL64(x,y) rotl64(x,y)
70 #define BIG_CONSTANT(x) (x##LLU)
72 ALWAYS_INLINE uint64_t rotl64(uint64_t x, int8_t r) {
73 return (x << r) | (x >> (64 - r));
76 ALWAYS_INLINE uint32_t rotl32(uint32_t x, int8_t r) {
77 return (x << r) | (x >> (32 - r));
80 template <bool caseSensitive>
81 ALWAYS_INLINE uint64_t getblock64(const uint64_t *p, int i) {
82 uint64_t block = p[i];
83 if (!caseSensitive) {
84 block &= 0xdfdfdfdfdfdfdfdfLLU; // a-z => A-Z
86 return block;
89 template <bool caseSensitive>
90 ALWAYS_INLINE uint32_t getblock32(const uint32_t *p, int i) {
91 uint32_t block = p[i];
92 if (!caseSensitive) {
93 block &= 0xdfdfdfdfU; // a-z => A-Z
95 return block;
98 template <bool caseSensitive>
99 ALWAYS_INLINE uint8_t getblock8(const uint8_t *p, int i) {
100 uint8_t block = p[i];
101 if (!caseSensitive) {
102 block &= 0xdfU; // a-z => A-Z
104 return block;
107 //-----------------------------------------------------------------------------
108 // Finalization mix - force all bits of a hash block to avalanche
109 ALWAYS_INLINE uint64_t fmix64(uint64_t k) {
110 k ^= k >> 33;
111 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
112 k ^= k >> 33;
113 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
114 k ^= k >> 33;
115 return k;
118 ALWAYS_INLINE uint32_t fmix32(uint32_t h) {
119 h ^= h >> 16;
120 h *= 0x85ebca6b;
121 h ^= h >> 13;
122 h *= 0xc2b2ae35;
123 h ^= h >> 16;
124 return h;
127 template <bool caseSensitive>
128 ALWAYS_INLINE uint32_t hash32(const void *key, size_t len, uint32_t seed) {
129 const uint8_t *data = (const uint8_t *)key;
130 const size_t nblocks = len / 4;
131 uint32_t h1 = seed;
132 const uint32_t c1 = 0xcc9e2d51;
133 const uint32_t c2 = 0x1b873593;
135 //----------
136 // body
137 const uint32_t *blocks = (const uint32_t *)(data + nblocks*4);
138 for(size_t i = -nblocks; i; i++) {
139 uint32_t k1 = getblock32<caseSensitive>(blocks, i);
140 k1 *= c1;
141 k1 = ROTL32(k1,15);
142 k1 *= c2;
143 h1 ^= k1;
144 h1 = ROTL32(h1,13);
145 h1 = h1*5+0xe6546b64;
148 //----------
149 // tail
150 const uint8_t *tail = (const uint8_t*)(data + nblocks*4);
151 uint32_t k1 = 0;
152 switch(len & 3) {
153 case 3: k1 ^= getblock8<caseSensitive>(tail, 2) << 16;
154 case 2: k1 ^= getblock8<caseSensitive>(tail, 1) << 8;
155 case 1: k1 ^= getblock8<caseSensitive>(tail, 0);
156 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
159 //----------
160 // finalization
161 h1 ^= len;
162 h1 = fmix32(h1);
164 return h1;
167 // Optimized for 64-bit architectures. MurmurHash3 also implements a 128-bit
168 // hash that is optimized for 32-bit architectures (omitted here).
169 template <bool caseSensitive>
170 ALWAYS_INLINE void hash128(const void *key, size_t len, uint64_t seed,
171 uint64_t out[2]) {
172 const uint8_t *data = (const uint8_t *)key;
173 const size_t nblocks = len / 16;
174 uint64_t h1 = seed;
175 uint64_t h2 = seed;
176 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
177 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
179 //----------
180 // body
181 const uint64_t *blocks = (const uint64_t *)(data);
182 for(size_t i = 0; i < nblocks; i++)
184 uint64_t k1 = getblock64<caseSensitive>(blocks,i*2+0);
185 uint64_t k2 = getblock64<caseSensitive>(blocks,i*2+1);
186 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
187 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
188 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
189 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
192 //----------
193 // tail
194 const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
195 uint64_t k1 = 0;
196 uint64_t k2 = 0;
197 switch(len & 15)
199 case 15: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 14)) << 48;
200 case 14: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 13)) << 40;
201 case 13: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 12)) << 32;
202 case 12: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 11)) << 24;
203 case 11: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 10)) << 16;
204 case 10: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 9)) << 8;
205 case 9: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 8)) << 0;
206 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
208 case 8: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 7)) << 56;
209 case 7: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 6)) << 48;
210 case 6: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 5)) << 40;
211 case 5: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 4)) << 32;
212 case 4: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 3)) << 24;
213 case 3: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 2)) << 16;
214 case 2: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 1)) << 8;
215 case 1: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 0)) << 0;
216 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
219 //----------
220 // finalization
221 h1 ^= len; h2 ^= len;
222 h1 += h2;
223 h2 += h1;
224 h1 = fmix64(h1);
225 h2 = fmix64(h2);
226 h1 += h2;
227 h2 += h1;
229 ((uint64_t*)out)[0] = h1;
230 ((uint64_t*)out)[1] = h2;
233 #undef ROTL32
234 #undef ROTL64
235 #undef BIG_CONSTANT
236 ///////////////////////////////////////////////////////////////////////////////
237 } // namespace MurmurHash3
239 inline strhash_t hash_string_cs(const char *arKey, int nKeyLength) {
240 if (MurmurHash3::useHash128) {
241 uint64_t h[2];
242 MurmurHash3::hash128<true>(arKey, nKeyLength, 0, h);
243 return strhash_t(h[0] & STRHASH_MASK);
244 } else {
245 uint32_t h = MurmurHash3::hash32<true>(arKey, nKeyLength, 0);
246 return strhash_t(h & STRHASH_MASK);
250 strhash_t hash_string_i(const char *arKey, int nKeyLength);
251 strhash_t hash_string(const char *arKey, int nKeyLength);
253 inline strhash_t hash_string_i_inline(const char *arKey, int nKeyLength) {
254 if (MurmurHash3::useHash128) {
255 uint64_t h[2];
256 MurmurHash3::hash128<false>(arKey, nKeyLength, 0, h);
257 return strhash_t(h[0] & STRHASH_MASK);
258 } else {
259 uint32_t h = MurmurHash3::hash32<false>(arKey, nKeyLength, 0);
260 return strhash_t(h & STRHASH_MASK);
264 inline strhash_t hash_string_inline(const char *arKey, int nKeyLength) {
265 return hash_string_i(arKey, nKeyLength);
269 * We probably should get rid of this, so to detect code generation errors,
270 * where a binary string is treated as a NULL-terminated literal. Do we ever
271 * allow binary strings as array keys or symbol names?
273 inline strhash_t hash_string(const char *arKey) {
274 return hash_string(arKey, strlen(arKey));
276 inline strhash_t hash_string_i(const char *arKey) {
277 return hash_string_i(arKey, strlen(arKey));
280 // This function returns true and sets the res parameter if arKey
281 // is a non-empty string that matches one of the following conditions:
282 // 1) The string is "0".
283 // 2) The string starts with a non-zero digit, followed by at most
284 // 18 more digits, and is less than or equal to 9223372036854775806.
285 // 3) The string starts with a negative sign, followed by a non-zero
286 // digit, followed by at most 18 more digits, and is greater than
287 // or equal to -9223372036854775807.
288 inline bool is_strictly_integer(const char* arKey, size_t nKeyLength,
289 int64_t& res) {
290 if ((unsigned char)(arKey[0] - '-') > ('9' - '-'))
291 return false;
292 if (nKeyLength <= 19 ||
293 (arKey[0] == '-' && nKeyLength == 20)) {
294 unsigned long long num = 0;
295 bool neg = false;
296 unsigned i = 0;
297 if (arKey[0] == '-') {
298 neg = true;
299 i = 1;
300 // The string "-" is NOT strictly an integer
301 if (nKeyLength == 1)
302 return false;
303 // A string that starts with "-0" is NOT strictly an integer
304 if (arKey[1] == '0')
305 return false;
306 } else if (arKey[0] == '0') {
307 // The string "0" is strictly an integer
308 if (nKeyLength == 1) {
309 res = 0;
310 return true;
312 // A string that starts with "0" followed by at least one digit
313 // is NOT strictly an integer
314 return false;
316 bool good = true;
317 for (; i < nKeyLength; ++i) {
318 if (arKey[i] >= '0' && arKey[i] <= '9') {
319 num = 10*num + arKey[i] - '0';
321 else {
322 good = false;
323 break;
326 if (good) {
327 if (num <= 0x7FFFFFFFFFFFFFFFULL ||
328 (neg && num == 0x8000000000000000ULL)) {
329 res = neg ? 0 - num : (long long)num;
330 return true;
334 return false;
337 ///////////////////////////////////////////////////////////////////////////////
340 #endif // incl_HPHP_HASH_H_