add BSD license
[fast-recs-collate.git] / lookup3.c
blobd968a920fc11620435b2ae3d174cd34846ee9337
1 /*
2 -------------------------------------------------------------------------------
3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
5 These are functions for producing 32-bit hashes for hash table lookup.
6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7 are externally useful functions. Routines to test the hash are included
8 if SELF_TEST is defined. You can use this free for any purpose. It's in
9 the public domain. It has no warranty.
11 You probably want to use hashlittle(). hashlittle() and hashbig()
12 hash byte arrays. hashlittle() is is faster than hashbig() on
13 little-endian machines. Intel and AMD are little-endian machines.
14 On second thought, you probably want hashlittle2(), which is identical to
15 hashlittle() except it returns two 32-bit hashes for the price of one.
16 You could implement hashbig2() if you wanted but I haven't bothered here.
18 If you want to find a hash of, say, exactly 7 integers, do
19 a = i1; b = i2; c = i3;
20 mix(a,b,c);
21 a += i4; b += i5; c += i6;
22 mix(a,b,c);
23 a += i7;
24 final(a,b,c);
25 then use c as the hash value. If you have a variable length array of
26 4-byte integers to hash, use hashword(). If you have a byte array (like
27 a character string), use hashlittle(). If you have several byte arrays, or
28 a mix of things, see the comments above hashlittle().
30 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31 then mix those integers. This is fast (you can do a lot more thorough
32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34 -------------------------------------------------------------------------------
37 #include <stdint.h> /* defines uint32_t etc */
38 #include <sys/param.h> /* attempt to define endianness */
39 #ifdef linux
40 # include <endian.h> /* attempt to define endianness */
41 #endif
43 #define VALGRIND
46 * My best guess at if you are big-endian or little-endian. This may
47 * need adjustment.
49 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
50 __BYTE_ORDER == __LITTLE_ENDIAN) || \
51 (defined(i386) || defined(__i386__) || defined(__i486__) || \
52 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
53 # define HASH_LITTLE_ENDIAN 1
54 # define HASH_BIG_ENDIAN 0
55 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
56 __BYTE_ORDER == __BIG_ENDIAN) || \
57 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
58 # define HASH_LITTLE_ENDIAN 0
59 # define HASH_BIG_ENDIAN 1
60 #else
61 # define HASH_LITTLE_ENDIAN 0
62 # define HASH_BIG_ENDIAN 0
63 #endif
65 #define hashsize(n) ((uint32_t)1<<(n))
66 #define hashmask(n) (hashsize(n)-1)
67 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
70 -------------------------------------------------------------------------------
71 mix -- mix 3 32-bit values reversibly.
73 This is reversible, so any information in (a,b,c) before mix() is
74 still in (a,b,c) after mix().
76 If four pairs of (a,b,c) inputs are run through mix(), or through
77 mix() in reverse, there are at least 32 bits of the output that
78 are sometimes the same for one pair and different for another pair.
79 This was tested for:
80 * pairs that differed by one bit, by two bits, in any combination
81 of top bits of (a,b,c), or in any combination of bottom bits of
82 (a,b,c).
83 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
84 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
85 is commonly produced by subtraction) look like a single 1-bit
86 difference.
87 * the base values were pseudorandom, all zero but one bit set, or
88 all zero plus a counter that starts at zero.
90 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
91 satisfy this are
92 4 6 8 16 19 4
93 9 15 3 18 27 15
94 14 9 3 7 17 3
95 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
96 for "differ" defined as + with a one-bit base and a two-bit delta. I
97 used http://burtleburtle.net/bob/hash/avalanche.html to choose
98 the operations, constants, and arrangements of the variables.
100 This does not achieve avalanche. There are input bits of (a,b,c)
101 that fail to affect some output bits of (a,b,c), especially of a. The
102 most thoroughly mixed value is c, but it doesn't really even achieve
103 avalanche in c.
105 This allows some parallelism. Read-after-writes are good at doubling
106 the number of bits affected, so the goal of mixing pulls in the opposite
107 direction as the goal of parallelism. I did what I could. Rotates
108 seem to cost as much as shifts on every machine I could lay my hands
109 on, and rotates are much kinder to the top and bottom bits, so I used
110 rotates.
111 -------------------------------------------------------------------------------
113 #define mix(a,b,c) \
115 a -= c; a ^= rot(c, 4); c += b; \
116 b -= a; b ^= rot(a, 6); a += c; \
117 c -= b; c ^= rot(b, 8); b += a; \
118 a -= c; a ^= rot(c,16); c += b; \
119 b -= a; b ^= rot(a,19); a += c; \
120 c -= b; c ^= rot(b, 4); b += a; \
124 -------------------------------------------------------------------------------
125 final -- final mixing of 3 32-bit values (a,b,c) into c
127 Pairs of (a,b,c) values differing in only a few bits will usually
128 produce values of c that look totally different. This was tested for
129 * pairs that differed by one bit, by two bits, in any combination
130 of top bits of (a,b,c), or in any combination of bottom bits of
131 (a,b,c).
132 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
133 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
134 is commonly produced by subtraction) look like a single 1-bit
135 difference.
136 * the base values were pseudorandom, all zero but one bit set, or
137 all zero plus a counter that starts at zero.
139 These constants passed:
140 14 11 25 16 4 14 24
141 12 14 25 16 4 14 24
142 and these came close:
143 4 8 15 26 3 22 24
144 10 8 15 26 3 22 24
145 11 8 15 26 3 22 24
146 -------------------------------------------------------------------------------
148 #define final(a,b,c) \
150 c ^= b; c -= rot(b,14); \
151 a ^= c; a -= rot(c,11); \
152 b ^= a; b -= rot(a,25); \
153 c ^= b; c -= rot(b,16); \
154 a ^= c; a -= rot(c,4); \
155 b ^= a; b -= rot(a,14); \
156 c ^= b; c -= rot(b,24); \
160 -------------------------------------------------------------------------------
161 hashlittle() -- hash a variable-length key into a 32-bit value
162 k : the key (the unaligned variable-length array of bytes)
163 length : the length of the key, counting by bytes
164 initval : can be any 4-byte value
165 Returns a 32-bit value. Every bit of the key affects every bit of
166 the return value. Two keys differing by one or two bits will have
167 totally different hash values.
169 The best hash table sizes are powers of 2. There is no need to do
170 mod a prime (mod is sooo slow!). If you need less than 32 bits,
171 use a bitmask. For example, if you need only 10 bits, do
172 h = (h & hashmask(10));
173 In which case, the hash table should have hashsize(10) elements.
175 If you are hashing n strings (uint8_t **)k, do it like this:
176 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
178 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
179 code any way you wish, private, educational, or commercial. It's free.
181 Use for hash table lookup, or anything where one collision in 2^^32 is
182 acceptable. Do NOT use for cryptographic purposes.
183 -------------------------------------------------------------------------------
186 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
188 uint32_t a,b,c; /* internal state */
189 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
191 /* Set up the internal state */
192 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
194 u.ptr = key;
195 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
196 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
198 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
199 while (length > 12)
201 a += k[0];
202 b += k[1];
203 c += k[2];
204 mix(a,b,c);
205 length -= 12;
206 k += 3;
209 /*----------------------------- handle the last (probably partial) block */
211 * "k[2]&0xffffff" actually reads beyond the end of the string, but
212 * then masks off the part it's not allowed to read. Because the
213 * string is aligned, the masked-off tail is in the same word as the
214 * rest of the string. Every machine with memory protection I've seen
215 * does it on word boundaries, so is OK with this. But VALGRIND will
216 * still catch it and complain. The masking trick does make the hash
217 * noticably faster for short strings (like English words).
219 #ifndef VALGRIND
221 switch(length)
223 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
224 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
225 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
226 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
227 case 8 : b+=k[1]; a+=k[0]; break;
228 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
229 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
230 case 5 : b+=k[1]&0xff; a+=k[0]; break;
231 case 4 : a+=k[0]; break;
232 case 3 : a+=k[0]&0xffffff; break;
233 case 2 : a+=k[0]&0xffff; break;
234 case 1 : a+=k[0]&0xff; break;
235 case 0 : return c; /* zero length strings require no mixing */
238 #else /* make valgrind happy */
240 const uint8_t *k8 = (const uint8_t *)k;
241 switch(length)
243 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
244 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
245 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
246 case 9 : c+=k8[8]; /* fall through */
247 case 8 : b+=k[1]; a+=k[0]; break;
248 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
249 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
250 case 5 : b+=k8[4]; /* fall through */
251 case 4 : a+=k[0]; break;
252 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
253 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
254 case 1 : a+=k8[0]; break;
255 case 0 : return c;
258 #endif /* !valgrind */
260 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
261 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
262 const uint8_t *k8;
264 /*--------------- all but last block: aligned reads and different mixing */
265 while (length > 12)
267 a += k[0] + (((uint32_t)k[1])<<16);
268 b += k[2] + (((uint32_t)k[3])<<16);
269 c += k[4] + (((uint32_t)k[5])<<16);
270 mix(a,b,c);
271 length -= 12;
272 k += 6;
275 /*----------------------------- handle the last (probably partial) block */
276 k8 = (const uint8_t *)k;
277 switch(length)
279 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
280 b+=k[2]+(((uint32_t)k[3])<<16);
281 a+=k[0]+(((uint32_t)k[1])<<16);
282 break;
283 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
284 case 10: c+=k[4];
285 b+=k[2]+(((uint32_t)k[3])<<16);
286 a+=k[0]+(((uint32_t)k[1])<<16);
287 break;
288 case 9 : c+=k8[8]; /* fall through */
289 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
290 a+=k[0]+(((uint32_t)k[1])<<16);
291 break;
292 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
293 case 6 : b+=k[2];
294 a+=k[0]+(((uint32_t)k[1])<<16);
295 break;
296 case 5 : b+=k8[4]; /* fall through */
297 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
298 break;
299 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
300 case 2 : a+=k[0];
301 break;
302 case 1 : a+=k8[0];
303 break;
304 case 0 : return c; /* zero length requires no mixing */
307 } else { /* need to read the key one byte at a time */
308 const uint8_t *k = (const uint8_t *)key;
310 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
311 while (length > 12)
313 a += k[0];
314 a += ((uint32_t)k[1])<<8;
315 a += ((uint32_t)k[2])<<16;
316 a += ((uint32_t)k[3])<<24;
317 b += k[4];
318 b += ((uint32_t)k[5])<<8;
319 b += ((uint32_t)k[6])<<16;
320 b += ((uint32_t)k[7])<<24;
321 c += k[8];
322 c += ((uint32_t)k[9])<<8;
323 c += ((uint32_t)k[10])<<16;
324 c += ((uint32_t)k[11])<<24;
325 mix(a,b,c);
326 length -= 12;
327 k += 12;
330 /*-------------------------------- last block: affect all 32 bits of (c) */
331 switch(length) /* all the case statements fall through */
333 case 12: c+=((uint32_t)k[11])<<24;
334 case 11: c+=((uint32_t)k[10])<<16;
335 case 10: c+=((uint32_t)k[9])<<8;
336 case 9 : c+=k[8];
337 case 8 : b+=((uint32_t)k[7])<<24;
338 case 7 : b+=((uint32_t)k[6])<<16;
339 case 6 : b+=((uint32_t)k[5])<<8;
340 case 5 : b+=k[4];
341 case 4 : a+=((uint32_t)k[3])<<24;
342 case 3 : a+=((uint32_t)k[2])<<16;
343 case 2 : a+=((uint32_t)k[1])<<8;
344 case 1 : a+=k[0];
345 break;
346 case 0 : return c;
350 final(a,b,c);
351 return c;