Move serialize functions to variable-serializer.cpp
[hiphop-php.git] / hphp / util / hash.h
blob77a421bc9119f517f5094f8ebb854ce04eb43b32
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_HASH_H_
18 #define incl_HPHP_HASH_H_
20 #include <stdint.h>
21 #include <cstring>
23 #include "hphp/util/portability.h"
25 #if defined __x86_64__
26 # if (!defined USE_SSECRC)
27 # define USE_SSECRC
28 # endif
29 #else
30 # undef USE_SSECRC
31 #endif
33 // Killswitch
34 #if NO_SSECRC
35 # undef USE_SSECRC
36 #endif
38 namespace HPHP {
40 bool IsSSEHashSupported();
42 ///////////////////////////////////////////////////////////////////////////////
44 typedef int32_t strhash_t;
45 const strhash_t STRHASH_MASK = 0x7fffffff;
46 const strhash_t STRHASH_MSB = 0x80000000;
48 inline long long hash_int64(long long key) {
49 // "64 bit Mix Functions", from Thomas Wang's "Integer Hash Function."
50 // http://www.concentric.net/~ttwang/tech/inthash.htm
51 key = (~key) + (key << 21); // key = (key << 21) - key - 1;
52 key = key ^ ((unsigned long long)key >> 24);
53 key = (key + (key << 3)) + (key << 8); // key * 265
54 key = key ^ ((unsigned long long)key >> 14);
55 key = (key + (key << 2)) + (key << 4); // key * 21
56 key = key ^ ((unsigned long long)key >> 28);
57 key = key + (key << 31);
58 return key < 0 ? -key : key;
61 inline long long hash_int64_pair(long long k1, long long k2) {
62 // Shift the first key, so (a,b) hashes somewhere other than (b,a)
63 return (hash_int64(k1) << 1) ^ hash_int64(k2);
66 namespace MurmurHash3 {
67 ///////////////////////////////////////////////////////////////////////////////
68 // The following code is based on MurmurHash3:
69 // http://code.google.com/p/smhasher/wiki/MurmurHash3
71 // The case-insensitive version converts lowercase characters to uppercase
72 // under the assumption that character data are 7-bit ASCII. This should work
73 // as identifiers usually only contain alphanumeric characters and the
74 // underscore. Although PHP allows higher ASCII characters (> 127) in an
75 // identifier, they should be very rare, and do not change the correctness.
77 #define ROTL64(x,y) rotl64(x,y)
78 #define BIG_CONSTANT(x) (x##LLU)
80 ALWAYS_INLINE uint64_t rotl64(uint64_t x, int8_t r) {
81 return (x << r) | (x >> (64 - r));
84 template <bool caseSensitive>
85 ALWAYS_INLINE uint64_t getblock64(const uint64_t *p, int i) {
86 uint64_t block = p[i];
87 if (!caseSensitive) {
88 block &= 0xdfdfdfdfdfdfdfdfLLU; // a-z => A-Z
90 return block;
93 template <bool caseSensitive>
94 ALWAYS_INLINE uint8_t getblock8(const uint8_t *p, int i) {
95 uint8_t block = p[i];
96 if (!caseSensitive) {
97 block &= 0xdfU; // a-z => A-Z
99 return block;
102 //-----------------------------------------------------------------------------
103 // Finalization mix - force all bits of a hash block to avalanche
104 ALWAYS_INLINE uint64_t fmix64(uint64_t k) {
105 k ^= k >> 33;
106 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
107 k ^= k >> 33;
108 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
109 k ^= k >> 33;
110 return k;
113 // Optimized for 64-bit architectures. MurmurHash3 also implements a 128-bit
114 // hash that is optimized for 32-bit architectures (omitted here).
115 template <bool caseSensitive>
116 ALWAYS_INLINE void hash128(const void *key, size_t len, uint64_t seed,
117 uint64_t out[2]) {
118 const uint8_t *data = (const uint8_t *)key;
119 const size_t nblocks = len / 16;
120 uint64_t h1 = seed;
121 uint64_t h2 = seed;
122 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
123 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
125 //----------
126 // body
127 const uint64_t *blocks = (const uint64_t *)(data);
128 for(size_t i = 0; i < nblocks; i++)
130 uint64_t k1 = getblock64<caseSensitive>(blocks,i*2+0);
131 uint64_t k2 = getblock64<caseSensitive>(blocks,i*2+1);
132 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
133 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
134 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
135 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
138 //----------
139 // tail
140 const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
141 uint64_t k1 = 0;
142 uint64_t k2 = 0;
143 switch(len & 15)
145 case 15: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 14)) << 48;
146 case 14: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 13)) << 40;
147 case 13: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 12)) << 32;
148 case 12: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 11)) << 24;
149 case 11: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 10)) << 16;
150 case 10: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 9)) << 8;
151 case 9: k2 ^= uint64_t(getblock8<caseSensitive>(tail, 8)) << 0;
152 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
154 case 8: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 7)) << 56;
155 case 7: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 6)) << 48;
156 case 6: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 5)) << 40;
157 case 5: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 4)) << 32;
158 case 4: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 3)) << 24;
159 case 3: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 2)) << 16;
160 case 2: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 1)) << 8;
161 case 1: k1 ^= uint64_t(getblock8<caseSensitive>(tail, 0)) << 0;
162 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
165 //----------
166 // finalization
167 h1 ^= len; h2 ^= len;
168 h1 += h2;
169 h2 += h1;
170 h1 = fmix64(h1);
171 h2 = fmix64(h2);
172 h1 += h2;
173 h2 += h1;
175 ((uint64_t*)out)[0] = h1;
176 ((uint64_t*)out)[1] = h2;
179 #undef ROTL64
180 #undef BIG_CONSTANT
181 ///////////////////////////////////////////////////////////////////////////////
182 } // namespace MurmurHash3
184 // Four functions for hashing: hash_string_(cs|i)(_unsafe)?.
185 // cs: case-sensitive;
186 // i: case-insensitive;
187 // unsafe: safe for strings aligned at 8-byte boundary;
189 // Fallback versions uses CRC hash when supported, and use MurmurHash otherwise.
190 strhash_t hash_string_cs_fallback(const char *arKey, uint32_t nKeyLength);
191 strhash_t hash_string_i_fallback(const char *arKey, uint32_t nKeyLength);
193 #if FACEBOOK && defined USE_SSECRC
195 strhash_t hash_string_cs_unsafe(const char *arKey, uint32_t nKeyLength);
196 strhash_t hash_string_i_unsafe(const char *arKey, uint32_t nKeyLength);
197 strhash_t hash_string_cs(const char *arKey, uint32_t nKeyLength);
198 strhash_t hash_string_i(const char *arKey, uint32_t nKeyLength);
200 #else
202 inline strhash_t hash_string_cs(const char *arKey, uint32_t nKeyLength) {
203 return hash_string_cs_fallback(arKey, nKeyLength);
206 inline
207 strhash_t hash_string_cs_unsafe(const char *arKey, uint32_t nKeyLength) {
208 return hash_string_cs_fallback(arKey, nKeyLength);
211 inline strhash_t hash_string_i(const char *arKey, uint32_t nKeyLength) {
212 return hash_string_i_fallback(arKey, nKeyLength);
215 inline
216 strhash_t hash_string_i_unsafe(const char *arKey, uint32_t nKeyLength) {
217 return hash_string_i_fallback(arKey, nKeyLength);
220 #endif
222 inline strhash_t hash_string(const char *arKey, uint32_t nKeyLength) {
223 return hash_string_i(arKey, nKeyLength);
226 inline strhash_t hash_string_unsafe(const char *arKey, uint32_t nKeyLength) {
227 return hash_string_i_unsafe(arKey, nKeyLength);
231 * We probably should get rid of this, so to detect code generation errors,
232 * where a binary string is treated as a NULL-terminated literal. Do we ever
233 * allow binary strings as array keys or symbol names?
235 inline strhash_t hash_string(const char *arKey) {
236 return hash_string(arKey, strlen(arKey));
239 inline strhash_t hash_string_i(const char *arKey) {
240 return hash_string_i(arKey, strlen(arKey));
243 // This function returns true and sets the res parameter if arKey
244 // is a non-empty string that matches one of the following conditions:
245 // 1) The string is "0".
246 // 2) The string starts with a non-zero digit, followed by at most
247 // 18 more digits, and is less than or equal to 2^63 - 1.
248 // 3) The string starts with a negative sign, followed by a non-zero
249 // digit, followed by at most 18 more digits, and is greater than
250 // or equal to -2^63.
251 inline bool is_strictly_integer(const char* arKey, size_t nKeyLength,
252 int64_t& res) {
253 if ((unsigned char)(arKey[0] - '-') > ('9' - '-'))
254 return false;
255 if (nKeyLength <= 19 ||
256 (arKey[0] == '-' && nKeyLength == 20)) {
257 unsigned long long num = 0;
258 bool neg = false;
259 unsigned i = 0;
260 if (arKey[0] == '-') {
261 neg = true;
262 i = 1;
263 // The string "-" is NOT strictly an integer
264 if (nKeyLength == 1)
265 return false;
266 // A string that starts with "-0" is NOT strictly an integer
267 if (arKey[1] == '0')
268 return false;
269 } else if (arKey[0] == '0') {
270 // The string "0" is strictly an integer
271 if (nKeyLength == 1) {
272 res = 0;
273 return true;
275 // A string that starts with "0" followed by at least one digit
276 // is NOT strictly an integer
277 return false;
279 bool good = true;
280 for (; i < nKeyLength; ++i) {
281 if (arKey[i] >= '0' && arKey[i] <= '9') {
282 num = 10*num + (arKey[i] - '0');
284 else {
285 good = false;
286 break;
289 if (good) {
290 if (num <= 0x7FFFFFFFFFFFFFFFULL ||
291 (neg && num == 0x8000000000000000ULL)) {
292 res = neg ? 0 - num : (long long)num;
293 return true;
297 return false;
300 class StringData;
301 ///////////////////////////////////////////////////////////////////////////////
304 #ifdef USE_SSECRC
305 // The following functions are implemented in ASM directly for x86_64.
306 extern "C" {
307 HPHP::strhash_t hash_string_cs_crc(const char*, uint32_t);
308 HPHP::strhash_t hash_string_i_crc(const char*, uint32_t);
309 HPHP::strhash_t hash_string_cs_unaligned_crc(const char*, uint32_t);
310 HPHP::strhash_t hash_string_i_unaligned_crc(const char*, uint32_t);
311 HPHP::strhash_t g_hashHelper_crc(const HPHP::StringData*);
313 #endif
315 #endif // incl_HPHP_HASH_H_