1 /* Hashing infrastructure */
11 #include "util/hash.h"
12 #include "util/memory.h"
15 /* String hashes functions chooser. */
16 /* #define MIX_HASH */ /* Far more better but slower */
17 #define X31_HASH /* Weaker but faster */
20 /* We provide common infrastructure for hashing - each hash consists from one
21 * particularly large array full of small lists of keys with same index in the
22 * array (same hash value). */
24 #define hash_mask(n) (hash_size(n) - 1)
25 #define hash_size(n) (1 << (n))
27 static hash_value_T
strhash(unsigned char *k
, unsigned int length
, hash_value_T initval
);
29 static inline struct hash
*
30 init_hash(unsigned int width
, hash_func_T func
)
35 assert(width
> 0 && func
);
36 if_assert_failed
return NULL
;
38 /* One is already reserved in struct hash, so use size - 1. */
39 hash
= mem_alloc(sizeof(*hash
) + (hash_size(width
) - 1)
40 * sizeof(struct list_head
));
41 if (!hash
) return NULL
;
46 /* Initialize dummy list_heads */
47 for (; i
< hash_size(width
); i
++)
48 init_list(hash
->hash
[i
]);
56 return init_hash(8, &strhash
);
60 free_hash(struct hash
**hashp
)
64 assert(hashp
&& *hashp
);
65 if_assert_failed
return;
67 for (; i
< hash_size((*hashp
)->width
); i
++)
68 free_list((*hashp
)->hash
[i
]);
70 mem_free_set(hashp
, NULL
);
74 /* I've no much idea about what to set here.. I think it doesn't matter much
75 * anyway.. ;) --pasky */
76 #define HASH_MAGIC 0xdeadbeef
78 /* Returns hash_item if ok, NULL if error. */
80 add_hash_item(struct hash
*hash
, unsigned char *key
, unsigned int keylen
,
84 struct hash_item
*item
= mem_alloc(sizeof(*item
));
86 if (!item
) return NULL
;
88 hashval
= hash
->func(key
, keylen
, HASH_MAGIC
) & hash_mask(hash
->width
);
91 item
->keylen
= keylen
;
94 add_to_list(hash
->hash
[hashval
], item
);
100 get_hash_item(struct hash
*hash
, unsigned char *key
, unsigned int keylen
)
102 struct list_head
*list
;
103 struct hash_item
*item
;
104 hash_value_T hashval
;
106 hashval
= hash
->func(key
, keylen
, HASH_MAGIC
) & hash_mask(hash
->width
);
107 list
= &hash
->hash
[hashval
];
109 foreach (item
, *list
) {
110 if (keylen
!= item
->keylen
) continue;
111 if (memcmp(key
, item
->key
, keylen
)) continue;
113 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
114 * the list by popularity of its items. Inspired from Links,
115 * probably PerM. --pasky */
116 move_to_top_of_list(*list
, item
);
126 /* If key and/or value were dynamically allocated, think about freeing them.
127 * This function doesn't do that. */
129 del_hash_item(struct hash
*hash
, struct hash_item
*item
)
132 if_assert_failed
return;
141 /* Fast string hashing. */
143 strhash(unsigned char *k
, /* the key */
144 unsigned int length
, /* the length of the key */
145 hash_value_T initval
/* the previous hash, or an arbitrary value */)
147 const unsigned char *p
= (const unsigned char *) k
;
148 hash_value_T h
= initval
;
151 assert(k
&& length
> 0);
152 if_assert_failed
return h
;
154 /* This loop was unrolled, should improve performance on most cpus,
155 * After some tests, 8 seems a good value for most keys. --Zas */
157 h
= (h
<< 5) - h
+ p
[i
];
158 if (++i
== length
) break;
159 h
= (h
<< 5) - h
+ p
[i
];
160 if (++i
== length
) break;
161 h
= (h
<< 5) - h
+ p
[i
];
162 if (++i
== length
) break;
163 h
= (h
<< 5) - h
+ p
[i
];
164 if (++i
== length
) break;
165 h
= (h
<< 5) - h
+ p
[i
];
166 if (++i
== length
) break;
167 h
= (h
<< 5) - h
+ p
[i
];
168 if (++i
== length
) break;
169 h
= (h
<< 5) - h
+ p
[i
];
170 if (++i
== length
) break;
171 h
= (h
<< 5) - h
+ p
[i
];
172 if (++i
== length
) break;
180 /* String hashing function follows; it is not written by me, somewhere below
181 * are credits. I only hacked it a bit. --pasky */
183 /* This is a big CPU hog, in fact:
185 * % cumulative self self total-----------
186 * time seconds seconds calls us/call us/call name----
187 * 6.00 0.35 0.06 10126 5.93 5.93 strhash
189 * It should be investigated whether we couldn't push this down a little. */
192 * --------------------------------------------------------------------
193 * mix -- mix 3 32-bit values reversibly.
194 * For every delta with one or two bits set, and the deltas of all three
195 * high bits or all three low bits, whether the original value of a,b,c
196 * is almost all zero or is uniformly distributed,
197 * * If mix() is run forward or backward, at least 32 bits in a,b,c
198 * have at least 1/4 probability of changing.
199 * * If mix() is run forward, every bit of c will change between 1/3 and
200 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
201 * mix() was built out of 36 single-cycle latency instructions in a
202 * structure that could supported 2x parallelism, like so:
204 * a -= c; x = (c>>13);
206 * b -= a; x = (a<<8);
208 * c -= b; x = (b>>13);
210 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
211 * of that parallelism. They've also turned some of those single-cycle
212 * latency instructions into multi-cycle latency instructions. Still,
213 * this is the fastest good hash I could find. There were about 2^^68
214 * to choose from. I only looked at a billion or so.
215 * --------------------------------------------------------------------
218 #define mix(a, b, c) { \
219 a -= b; a -= c; a ^= (c>>13); \
220 b -= c; b -= a; b ^= (a<<8); \
221 c -= a; c -= b; c ^= (b>>13); \
222 a -= b; a -= c; a ^= (c>>12); \
223 b -= c; b -= a; b ^= (a<<16); \
224 c -= a; c -= b; c ^= (b>>5); \
225 a -= b; a -= c; a ^= (c>>3); \
226 b -= c; b -= a; b ^= (a<<10); \
227 c -= a; c -= b; c ^= (b>>15); \
231 --------------------------------------------------------------------
232 hash() -- hash a variable-length key into a 32-bit value
233 k : the key (the unaligned variable-length array of bytes)
234 len : the length of the key, counting by bytes
235 initval : can be any 4-byte value
236 Returns a 32-bit value. Every bit of the key affects every bit of
237 the return value. Every 1-bit and 2-bit delta achieves avalanche.
238 About 6*len+35 instructions.
240 The best hash table sizes are powers of 2. There is no need to do
241 mod a prime (mod is sooo slow!). If you need less than 32 bits,
242 use a bitmask. For example, if you need only 10 bits, do
243 h = (h & hashmask(10));
244 In which case, the hash table should have hashsize(10) elements.
246 If you are hashing n strings (ub1 **) k, do it like this:
247 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
249 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
250 code any way you wish, private, educational, or commercial. It's free.
252 See http://burtleburtle.net/bob/hash/evahash.html
253 Use for hash table lookup, or anything where one collision in 2^^32 is
254 acceptable. Do NOT use for cryptographic purposes.
255 --------------------------------------------------------------------
258 #define keycompute(a) ((k[(a)]) \
259 + ((hash_value_T) (k[(a)+1])<<8) \
260 + ((hash_value_T) (k[(a)+2])<<16) \
261 + ((hash_value_T) (k[(a)+3])<<24))
264 strhash(unsigned char *k
, /* the key */
265 unsigned int length
, /* the length of the key */
266 hash_value_T initval
/* the previous hash, or an arbitrary value */)
269 hash_value_T a
, b
, c
;
271 /* Set up the internal state */
273 a
= b
= 0x9e3779b9; /* the golden ratio; an arbitrary value */
274 c
= initval
; /* the previous hash value */
276 /*---------------------------------------- handle most of the key */
286 /*------------------------------------- handle the last 11 bytes */
288 switch (len
) { /* all the case statements fall through */
289 case 11: c
+= ((hash_value_T
) (k
[10])<<24);
290 case 10: c
+= ((hash_value_T
) (k
[9])<<16);
291 case 9 : c
+= ((hash_value_T
) (k
[8])<<8);
292 /* the first byte of c is reserved for the length */
293 case 8 : b
+= ((hash_value_T
) (k
[7])<<24);
294 case 7 : b
+= ((hash_value_T
) (k
[6])<<16);
295 case 6 : b
+= ((hash_value_T
) (k
[5])<<8);
296 case 5 : b
+= (k
[4]);
297 case 4 : a
+= ((hash_value_T
) (k
[3])<<24);
298 case 3 : a
+= ((hash_value_T
) (k
[2])<<16);
299 case 2 : a
+= ((hash_value_T
) (k
[1])<<8);
300 case 1 : a
+= (k
[0]);
301 /* case 0: nothing left to add */
306 /*-------------------------------------------- report the result */
314 #endif /* MIX_HASH */