1 /* Hashing infrastructure */
11 #include "util/hash.h"
12 #include "util/memory.h"
15 /* String hashes functions chooser. */
16 /* #define MIX_HASH */ /* Far more better but slower */
17 #define X31_HASH /* Weaker but faster */
20 /* We provide common infrastructure for hashing - each hash consists from one
21 * particularly large array full of small lists of keys with same index in the
22 * array (same hash value). */
24 #define hash_mask(n) (hash_size(n) - 1)
27 init_hash(unsigned int width
, hash_func_T func
)
32 assert(width
> 0 && func
);
33 if_assert_failed
return NULL
;
35 /* One is already reserved in struct hash, so use size - 1. */
36 hash
= mem_alloc(sizeof(*hash
) + (hash_size(width
) - 1)
37 * sizeof(struct list_head
));
38 if (!hash
) return NULL
;
43 /* Initialize dummy list_heads */
44 for (; i
< hash_size(width
); i
++)
45 init_list(hash
->hash
[i
]);
51 free_hash(struct hash
*hash
)
56 if_assert_failed
return;
58 for (; i
< hash_size(hash
->width
); i
++)
59 free_list(hash
->hash
[i
]);
65 /* I've no much idea about what to set here.. I think it doesn't matter much
66 * anyway.. ;) --pasky */
67 #define HASH_MAGIC 0xdeadbeef
69 /* Returns hash_item if ok, NULL if error. */
71 add_hash_item(struct hash
*hash
, unsigned char *key
, unsigned int keylen
,
75 struct hash_item
*item
= mem_alloc(sizeof(*item
));
77 if (!item
) return NULL
;
79 hashval
= hash
->func(key
, keylen
, HASH_MAGIC
) & hash_mask(hash
->width
);
82 item
->keylen
= keylen
;
85 add_to_list(hash
->hash
[hashval
], item
);
91 get_hash_item(struct hash
*hash
, unsigned char *key
, unsigned int keylen
)
93 struct list_head
*list
;
94 struct hash_item
*item
;
97 hashval
= hash
->func(key
, keylen
, HASH_MAGIC
) & hash_mask(hash
->width
);
98 list
= &hash
->hash
[hashval
];
100 foreach (item
, *list
) {
101 if (keylen
!= item
->keylen
) continue;
102 if (memcmp(key
, item
->key
, keylen
)) continue;
104 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
105 * the list by popularity of its items. Inspired from Links,
106 * probably PerM. --pasky */
107 move_to_top_of_list(*list
, item
);
117 /* If key and/or value were dynamically allocated, think about freeing them.
118 * This function doesn't do that. */
120 del_hash_item(struct hash
*hash
, struct hash_item
*item
)
123 if_assert_failed
return;
132 /* Fast string hashing. */
134 strhash(unsigned char *k
, /* the key */
135 unsigned int length
, /* the length of the key */
136 hash_value_T initval
/* the previous hash, or an arbitrary value */)
138 const unsigned char *p
= (const unsigned char *) k
;
139 hash_value_T h
= initval
;
142 assert(k
&& length
> 0);
143 if_assert_failed
return h
;
145 /* This loop was unrolled, should improve performance on most cpus,
146 * After some tests, 8 seems a good value for most keys. --Zas */
148 h
= (h
<< 5) - h
+ p
[i
];
149 if (++i
== length
) break;
150 h
= (h
<< 5) - h
+ p
[i
];
151 if (++i
== length
) break;
152 h
= (h
<< 5) - h
+ p
[i
];
153 if (++i
== length
) break;
154 h
= (h
<< 5) - h
+ p
[i
];
155 if (++i
== length
) break;
156 h
= (h
<< 5) - h
+ p
[i
];
157 if (++i
== length
) break;
158 h
= (h
<< 5) - h
+ p
[i
];
159 if (++i
== length
) break;
160 h
= (h
<< 5) - h
+ p
[i
];
161 if (++i
== length
) break;
162 h
= (h
<< 5) - h
+ p
[i
];
163 if (++i
== length
) break;
171 /* String hashing function follows; it is not written by me, somewhere below
172 * are credits. I only hacked it a bit. --pasky */
174 /* This is a big CPU hog, in fact:
176 * % cumulative self self total-----------
177 * time seconds seconds calls us/call us/call name----
178 * 6.00 0.35 0.06 10126 5.93 5.93 strhash
180 * It should be investigated whether we couldn't push this down a little. */
183 * --------------------------------------------------------------------
184 * mix -- mix 3 32-bit values reversibly.
185 * For every delta with one or two bits set, and the deltas of all three
186 * high bits or all three low bits, whether the original value of a,b,c
187 * is almost all zero or is uniformly distributed,
188 * * If mix() is run forward or backward, at least 32 bits in a,b,c
189 * have at least 1/4 probability of changing.
190 * * If mix() is run forward, every bit of c will change between 1/3 and
191 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
192 * mix() was built out of 36 single-cycle latency instructions in a
193 * structure that could supported 2x parallelism, like so:
195 * a -= c; x = (c>>13);
197 * b -= a; x = (a<<8);
199 * c -= b; x = (b>>13);
201 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
202 * of that parallelism. They've also turned some of those single-cycle
203 * latency instructions into multi-cycle latency instructions. Still,
204 * this is the fastest good hash I could find. There were about 2^^68
205 * to choose from. I only looked at a billion or so.
206 * --------------------------------------------------------------------
209 #define mix(a, b, c) { \
210 a -= b; a -= c; a ^= (c>>13); \
211 b -= c; b -= a; b ^= (a<<8); \
212 c -= a; c -= b; c ^= (b>>13); \
213 a -= b; a -= c; a ^= (c>>12); \
214 b -= c; b -= a; b ^= (a<<16); \
215 c -= a; c -= b; c ^= (b>>5); \
216 a -= b; a -= c; a ^= (c>>3); \
217 b -= c; b -= a; b ^= (a<<10); \
218 c -= a; c -= b; c ^= (b>>15); \
222 --------------------------------------------------------------------
223 hash() -- hash a variable-length key into a 32-bit value
224 k : the key (the unaligned variable-length array of bytes)
225 len : the length of the key, counting by bytes
226 initval : can be any 4-byte value
227 Returns a 32-bit value. Every bit of the key affects every bit of
228 the return value. Every 1-bit and 2-bit delta achieves avalanche.
229 About 6*len+35 instructions.
231 The best hash table sizes are powers of 2. There is no need to do
232 mod a prime (mod is sooo slow!). If you need less than 32 bits,
233 use a bitmask. For example, if you need only 10 bits, do
234 h = (h & hashmask(10));
235 In which case, the hash table should have hashsize(10) elements.
237 If you are hashing n strings (ub1 **) k, do it like this:
238 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
240 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
241 code any way you wish, private, educational, or commercial. It's free.
243 See http://burtleburtle.net/bob/hash/evahash.html
244 Use for hash table lookup, or anything where one collision in 2^^32 is
245 acceptable. Do NOT use for cryptographic purposes.
246 --------------------------------------------------------------------
249 #define keycompute(a) ((k[(a)]) \
250 + ((hash_value_T) (k[(a)+1])<<8) \
251 + ((hash_value_T) (k[(a)+2])<<16) \
252 + ((hash_value_T) (k[(a)+3])<<24))
255 strhash(unsigned char *k
, /* the key */
256 unsigned int length
, /* the length of the key */
257 hash_value_T initval
/* the previous hash, or an arbitrary value */)
260 hash_value_T a
, b
, c
;
262 /* Set up the internal state */
264 a
= b
= 0x9e3779b9; /* the golden ratio; an arbitrary value */
265 c
= initval
; /* the previous hash value */
267 /*---------------------------------------- handle most of the key */
277 /*------------------------------------- handle the last 11 bytes */
279 switch (len
) { /* all the case statements fall through */
280 case 11: c
+= ((hash_value_T
) (k
[10])<<24);
281 case 10: c
+= ((hash_value_T
) (k
[9])<<16);
282 case 9 : c
+= ((hash_value_T
) (k
[8])<<8);
283 /* the first byte of c is reserved for the length */
284 case 8 : b
+= ((hash_value_T
) (k
[7])<<24);
285 case 7 : b
+= ((hash_value_T
) (k
[6])<<16);
286 case 6 : b
+= ((hash_value_T
) (k
[5])<<8);
287 case 5 : b
+= (k
[4]);
288 case 4 : a
+= ((hash_value_T
) (k
[3])<<24);
289 case 3 : a
+= ((hash_value_T
) (k
[2])<<16);
290 case 2 : a
+= ((hash_value_T
) (k
[1])<<8);
291 case 1 : a
+= (k
[0]);
292 /* case 0: nothing left to add */
297 /*-------------------------------------------- report the result */
305 #endif /* MIX_HASH */