Merge with git+ssh://pasky.or.cz/srv/git/elinks.git
[elinks.git] / src / util / hash.c
blob6d0036ad91ccf59c0151826add5efea507c47815
1 /* Hashing infrastructure */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <string.h>
9 #include "elinks.h"
11 #include "util/hash.h"
12 #include "util/memory.h"
15 /* String hashes functions chooser. */
16 /* #define MIX_HASH */ /* Far more better but slower */
17 #define X31_HASH /* Weaker but faster */
20 /* We provide common infrastructure for hashing - each hash consists from one
21 * particularly large array full of small lists of keys with same index in the
22 * array (same hash value). */
24 #define hash_mask(n) (hash_size(n) - 1)
25 #define hash_size(n) (1 << (n))
27 static hash_value_T strhash(unsigned char *k, unsigned int length, hash_value_T initval);
29 static inline struct hash *
30 init_hash(unsigned int width, hash_func_T func)
32 struct hash *hash;
33 unsigned int i = 0;
35 assert(width > 0 && func);
36 if_assert_failed return NULL;
38 /* One is already reserved in struct hash, so use size - 1. */
39 hash = mem_alloc(sizeof(*hash) + (hash_size(width) - 1)
40 * sizeof(struct list_head));
41 if (!hash) return NULL;
43 hash->width = width;
44 hash->func = func;
46 /* Initialize dummy list_heads */
47 for (; i < hash_size(width); i++)
48 init_list(hash->hash[i]);
50 return hash;
53 struct hash *
54 init_hash8(void)
56 return init_hash(8, &strhash);
59 void
60 free_hash(struct hash **hashp)
62 unsigned int i = 0;
64 assert(hashp && *hashp);
65 if_assert_failed return;
67 for (; i < hash_size((*hashp)->width); i++)
68 free_list((*hashp)->hash[i]);
70 mem_free_set(hashp, NULL);
74 /* I've no much idea about what to set here.. I think it doesn't matter much
75 * anyway.. ;) --pasky */
76 #define HASH_MAGIC 0xdeadbeef
78 /* Returns hash_item if ok, NULL if error. */
79 struct hash_item *
80 add_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen,
81 void *value)
83 hash_value_T hashval;
84 struct hash_item *item = mem_alloc(sizeof(*item));
86 if (!item) return NULL;
88 hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
90 item->key = key;
91 item->keylen = keylen;
92 item->value = value;
94 add_to_list(hash->hash[hashval], item);
96 return item;
99 struct hash_item *
100 get_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen)
102 struct list_head *list;
103 struct hash_item *item;
104 hash_value_T hashval;
106 hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
107 list = &hash->hash[hashval];
109 foreach (item, *list) {
110 if (keylen != item->keylen) continue;
111 if (memcmp(key, item->key, keylen)) continue;
113 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
114 * the list by popularity of its items. Inspired from Links,
115 * probably PerM. --pasky */
116 move_to_top_of_list(*list, item);
118 return item;
121 return NULL;
124 #undef HASH_MAGIC
126 /* If key and/or value were dynamically allocated, think about freeing them.
127 * This function doesn't do that. */
128 void
129 del_hash_item(struct hash *hash, struct hash_item *item)
131 assert(item);
132 if_assert_failed return;
134 del_from_list(item);
135 mem_free(item);
139 #ifdef X31_HASH
141 /* Fast string hashing. */
142 static hash_value_T
143 strhash(unsigned char *k, /* the key */
144 unsigned int length, /* the length of the key */
145 hash_value_T initval /* the previous hash, or an arbitrary value */)
147 const unsigned char *p = (const unsigned char *) k;
148 hash_value_T h = initval;
149 unsigned int i = 0;
151 assert(k && length > 0);
152 if_assert_failed return h;
154 /* This loop was unrolled, should improve performance on most cpus,
155 * After some tests, 8 seems a good value for most keys. --Zas */
156 for (;;) {
157 h = (h << 5) - h + p[i];
158 if (++i == length) break;
159 h = (h << 5) - h + p[i];
160 if (++i == length) break;
161 h = (h << 5) - h + p[i];
162 if (++i == length) break;
163 h = (h << 5) - h + p[i];
164 if (++i == length) break;
165 h = (h << 5) - h + p[i];
166 if (++i == length) break;
167 h = (h << 5) - h + p[i];
168 if (++i == length) break;
169 h = (h << 5) - h + p[i];
170 if (++i == length) break;
171 h = (h << 5) - h + p[i];
172 if (++i == length) break;
175 return h;
178 #else /* X31_HASH */
180 /* String hashing function follows; it is not written by me, somewhere below
181 * are credits. I only hacked it a bit. --pasky */
183 /* This is a big CPU hog, in fact:
185 * % cumulative self self total-----------
186 * time seconds seconds calls us/call us/call name----
187 * 6.00 0.35 0.06 10126 5.93 5.93 strhash
189 * It should be investigated whether we couldn't push this down a little. */
192 * --------------------------------------------------------------------
193 * mix -- mix 3 32-bit values reversibly.
194 * For every delta with one or two bits set, and the deltas of all three
195 * high bits or all three low bits, whether the original value of a,b,c
196 * is almost all zero or is uniformly distributed,
197 * * If mix() is run forward or backward, at least 32 bits in a,b,c
198 * have at least 1/4 probability of changing.
199 * * If mix() is run forward, every bit of c will change between 1/3 and
200 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
201 * mix() was built out of 36 single-cycle latency instructions in a
202 * structure that could supported 2x parallelism, like so:
203 * a -= b;
204 * a -= c; x = (c>>13);
205 * b -= c; a ^= x;
206 * b -= a; x = (a<<8);
207 * c -= a; b ^= x;
208 * c -= b; x = (b>>13);
209 * ...
210 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
211 * of that parallelism. They've also turned some of those single-cycle
212 * latency instructions into multi-cycle latency instructions. Still,
213 * this is the fastest good hash I could find. There were about 2^^68
214 * to choose from. I only looked at a billion or so.
215 * --------------------------------------------------------------------
218 #define mix(a, b, c) { \
219 a -= b; a -= c; a ^= (c>>13); \
220 b -= c; b -= a; b ^= (a<<8); \
221 c -= a; c -= b; c ^= (b>>13); \
222 a -= b; a -= c; a ^= (c>>12); \
223 b -= c; b -= a; b ^= (a<<16); \
224 c -= a; c -= b; c ^= (b>>5); \
225 a -= b; a -= c; a ^= (c>>3); \
226 b -= c; b -= a; b ^= (a<<10); \
227 c -= a; c -= b; c ^= (b>>15); \
231 --------------------------------------------------------------------
232 hash() -- hash a variable-length key into a 32-bit value
233 k : the key (the unaligned variable-length array of bytes)
234 len : the length of the key, counting by bytes
235 initval : can be any 4-byte value
236 Returns a 32-bit value. Every bit of the key affects every bit of
237 the return value. Every 1-bit and 2-bit delta achieves avalanche.
238 About 6*len+35 instructions.
240 The best hash table sizes are powers of 2. There is no need to do
241 mod a prime (mod is sooo slow!). If you need less than 32 bits,
242 use a bitmask. For example, if you need only 10 bits, do
243 h = (h & hashmask(10));
244 In which case, the hash table should have hashsize(10) elements.
246 If you are hashing n strings (ub1 **) k, do it like this:
247 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
249 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
250 code any way you wish, private, educational, or commercial. It's free.
252 See http://burtleburtle.net/bob/hash/evahash.html
253 Use for hash table lookup, or anything where one collision in 2^^32 is
254 acceptable. Do NOT use for cryptographic purposes.
255 --------------------------------------------------------------------
258 #define keycompute(a) ((k[(a)]) \
259 + ((hash_value_T) (k[(a)+1])<<8) \
260 + ((hash_value_T) (k[(a)+2])<<16) \
261 + ((hash_value_T) (k[(a)+3])<<24))
263 static hash_value_T
264 strhash(unsigned char *k, /* the key */
265 unsigned int length, /* the length of the key */
266 hash_value_T initval /* the previous hash, or an arbitrary value */)
268 int len;
269 hash_value_T a, b, c;
271 /* Set up the internal state */
272 len = length;
273 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
274 c = initval; /* the previous hash value */
276 /*---------------------------------------- handle most of the key */
277 while (len >= 12) {
278 a += keycompute(0);
279 b += keycompute(4);
280 c += keycompute(8);
281 mix(a, b, c);
282 k += 12;
283 len -= 12;
286 /*------------------------------------- handle the last 11 bytes */
287 c += length;
288 switch (len) { /* all the case statements fall through */
289 case 11: c += ((hash_value_T) (k[10])<<24);
290 case 10: c += ((hash_value_T) (k[9])<<16);
291 case 9 : c += ((hash_value_T) (k[8])<<8);
292 /* the first byte of c is reserved for the length */
293 case 8 : b += ((hash_value_T) (k[7])<<24);
294 case 7 : b += ((hash_value_T) (k[6])<<16);
295 case 6 : b += ((hash_value_T) (k[5])<<8);
296 case 5 : b += (k[4]);
297 case 4 : a += ((hash_value_T) (k[3])<<24);
298 case 3 : a += ((hash_value_T) (k[2])<<16);
299 case 2 : a += ((hash_value_T) (k[1])<<8);
300 case 1 : a += (k[0]);
301 /* case 0: nothing left to add */
304 mix(a, b, c);
306 /*-------------------------------------------- report the result */
307 return c;
310 #undef keycompute
311 #undef mix
312 #undef hash_mask
314 #endif /* MIX_HASH */