Retry only for https protocol
[elinks.git] / src / util / hash.c
bloba52f87e5ca263aec3172688aecf2774766d4c3e9
1 /** Hashing infrastructure
2 * @file */
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
8 #include <string.h>
10 #include "elinks.h"
12 #include "util/hash.h"
13 #include "util/memory.h"
16 /* String hashes functions chooser. */
17 /* #define MIX_HASH */ /* Far more better but slower */
18 #define X31_HASH /* Weaker but faster */
21 /* We provide common infrastructure for hashing - each hash consists from one
22 * particularly large array full of small lists of keys with same index in the
23 * array (same hash value). */
25 #define hash_mask(n) (hash_size(n) - 1)
26 #define hash_size(n) (1 << (n))
28 static hash_value_T strhash(unsigned char *k, unsigned int length, hash_value_T initval);
30 static inline struct hash *
31 init_hash(unsigned int width, hash_func_T func)
33 struct hash *hash;
34 unsigned int i = 0;
36 assert(width > 0 && func);
37 if_assert_failed return NULL;
39 /* One is already reserved in struct hash, so use size - 1. */
40 hash = mem_alloc(sizeof(*hash) + (hash_size(width) - 1)
41 * sizeof(struct list_head));
42 if (!hash) return NULL;
44 hash->width = width;
45 hash->func = func;
47 /* Initialize dummy list_heads */
48 for (; i < hash_size(width); i++)
49 init_list(hash->hash[i]);
51 return hash;
54 /** @relates hash */
55 struct hash *
56 init_hash8(void)
58 return init_hash(8, &strhash);
61 /** @relates hash */
62 void
63 free_hash(struct hash **hashp)
65 unsigned int i = 0;
67 assert(hashp && *hashp);
68 if_assert_failed return;
70 for (; i < hash_size((*hashp)->width); i++)
71 free_list((*hashp)->hash[i]);
73 mem_free_set(hashp, NULL);
77 /** Initialization vector for the hash function.
78 * I've no much idea about what to set here.. I think it doesn't matter much
79 * anyway.. ;) --pasky */
80 #define HASH_MAGIC 0xdeadbeef
82 /** @returns hash_item if ok, NULL if error.
83 * @relates hash */
84 struct hash_item *
85 add_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen,
86 void *value)
88 hash_value_T hashval;
89 struct hash_item *item = mem_alloc(sizeof(*item));
91 if (!item) return NULL;
93 hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
95 item->key = key;
96 item->keylen = keylen;
97 item->value = value;
99 add_to_list(hash->hash[hashval], item);
101 return item;
104 /** @relates hash */
105 struct hash_item *
106 get_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen)
108 struct list_head *list;
109 struct hash_item *item;
110 hash_value_T hashval;
112 hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
113 list = &hash->hash[hashval];
115 foreach (item, *list) {
116 if (keylen != item->keylen) continue;
117 if (memcmp(key, item->key, keylen)) continue;
119 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
120 * the list by popularity of its items. Inspired from Links,
121 * probably PerM. --pasky */
122 move_to_top_of_list(*list, item);
124 return item;
127 return NULL;
130 #undef HASH_MAGIC
132 /** Delete @a item from @a hash.
133 * If key and/or value were dynamically allocated, think about freeing them.
134 * This function doesn't do that.
135 * @relates hash */
136 void
137 del_hash_item(struct hash *hash, struct hash_item *item)
139 assert(item);
140 if_assert_failed return;
142 del_from_list(item);
143 mem_free(item);
147 #ifdef X31_HASH
149 /** Fast string hashing.
150 * @param k the key
151 * @param length the length of the key
152 * @param initval the previous hash, or an arbitrary value */
153 static hash_value_T
154 strhash(unsigned char *k,
155 unsigned int length,
156 hash_value_T initval)
158 const unsigned char *p = (const unsigned char *) k;
159 hash_value_T h = initval;
160 unsigned int i = 0;
162 assert(k && length > 0);
163 if_assert_failed return h;
165 /* This loop was unrolled, should improve performance on most cpus,
166 * After some tests, 8 seems a good value for most keys. --Zas */
167 for (;;) {
168 h = (h << 5) - h + p[i];
169 if (++i == length) break;
170 h = (h << 5) - h + p[i];
171 if (++i == length) break;
172 h = (h << 5) - h + p[i];
173 if (++i == length) break;
174 h = (h << 5) - h + p[i];
175 if (++i == length) break;
176 h = (h << 5) - h + p[i];
177 if (++i == length) break;
178 h = (h << 5) - h + p[i];
179 if (++i == length) break;
180 h = (h << 5) - h + p[i];
181 if (++i == length) break;
182 h = (h << 5) - h + p[i];
183 if (++i == length) break;
186 return h;
189 #else /* X31_HASH */
191 /* String hashing function follows; it is not written by me, somewhere below
192 * are credits. I only hacked it a bit. --pasky */
194 /* This is a big CPU hog, in fact:
196 * % cumulative self self total-----------
197 * time seconds seconds calls us/call us/call name----
198 * 6.00 0.35 0.06 10126 5.93 5.93 strhash
200 * It should be investigated whether we couldn't push this down a little. */
203 * --------------------------------------------------------------------
204 * mix -- mix 3 32-bit values reversibly.
205 * For every delta with one or two bits set, and the deltas of all three
206 * high bits or all three low bits, whether the original value of a,b,c
207 * is almost all zero or is uniformly distributed,
208 * * If mix() is run forward or backward, at least 32 bits in a,b,c
209 * have at least 1/4 probability of changing.
210 * * If mix() is run forward, every bit of c will change between 1/3 and
211 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
212 * mix() was built out of 36 single-cycle latency instructions in a
213 * structure that could supported 2x parallelism, like so:
214 * a -= b;
215 * a -= c; x = (c>>13);
216 * b -= c; a ^= x;
217 * b -= a; x = (a<<8);
218 * c -= a; b ^= x;
219 * c -= b; x = (b>>13);
220 * ...
221 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
222 * of that parallelism. They've also turned some of those single-cycle
223 * latency instructions into multi-cycle latency instructions. Still,
224 * this is the fastest good hash I could find. There were about 2^^68
225 * to choose from. I only looked at a billion or so.
226 * --------------------------------------------------------------------
229 #define mix(a, b, c) { \
230 a -= b; a -= c; a ^= (c>>13); \
231 b -= c; b -= a; b ^= (a<<8); \
232 c -= a; c -= b; c ^= (b>>13); \
233 a -= b; a -= c; a ^= (c>>12); \
234 b -= c; b -= a; b ^= (a<<16); \
235 c -= a; c -= b; c ^= (b>>5); \
236 a -= b; a -= c; a ^= (c>>3); \
237 b -= c; b -= a; b ^= (a<<10); \
238 c -= a; c -= b; c ^= (b>>15); \
242 --------------------------------------------------------------------
243 hash() -- hash a variable-length key into a 32-bit value
244 k : the key (the unaligned variable-length array of bytes)
245 len : the length of the key, counting by bytes
246 initval : can be any 4-byte value
247 Returns a 32-bit value. Every bit of the key affects every bit of
248 the return value. Every 1-bit and 2-bit delta achieves avalanche.
249 About 6*len+35 instructions.
251 The best hash table sizes are powers of 2. There is no need to do
252 mod a prime (mod is sooo slow!). If you need less than 32 bits,
253 use a bitmask. For example, if you need only 10 bits, do
254 h = (h & hashmask(10));
255 In which case, the hash table should have hashsize(10) elements.
257 If you are hashing n strings (ub1 **) k, do it like this:
258 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
260 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
261 code any way you wish, private, educational, or commercial. It's free.
263 See http://burtleburtle.net/bob/hash/evahash.html
264 Use for hash table lookup, or anything where one collision in 2^^32 is
265 acceptable. Do NOT use for cryptographic purposes.
266 --------------------------------------------------------------------
269 #define keycompute(a) ((k[(a)]) \
270 + ((hash_value_T) (k[(a)+1])<<8) \
271 + ((hash_value_T) (k[(a)+2])<<16) \
272 + ((hash_value_T) (k[(a)+3])<<24))
274 /** Hash an array of bytes.
275 * @param k the key
276 * @param length the length of the key
277 * @param initval the previous hash, or an arbitrary value */
278 static hash_value_T
279 strhash(unsigned char *k,
280 unsigned int length,
281 hash_value_T initval)
283 int len;
284 hash_value_T a, b, c;
286 /* Set up the internal state */
287 len = length;
288 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
289 c = initval; /* the previous hash value */
291 /*---------------------------------------- handle most of the key */
292 while (len >= 12) {
293 a += keycompute(0);
294 b += keycompute(4);
295 c += keycompute(8);
296 mix(a, b, c);
297 k += 12;
298 len -= 12;
301 /*------------------------------------- handle the last 11 bytes */
302 c += length;
303 switch (len) { /* all the case statements fall through */
304 case 11: c += ((hash_value_T) (k[10])<<24);
305 case 10: c += ((hash_value_T) (k[9])<<16);
306 case 9 : c += ((hash_value_T) (k[8])<<8);
307 /* the first byte of c is reserved for the length */
308 case 8 : b += ((hash_value_T) (k[7])<<24);
309 case 7 : b += ((hash_value_T) (k[6])<<16);
310 case 6 : b += ((hash_value_T) (k[5])<<8);
311 case 5 : b += (k[4]);
312 case 4 : a += ((hash_value_T) (k[3])<<24);
313 case 3 : a += ((hash_value_T) (k[2])<<16);
314 case 2 : a += ((hash_value_T) (k[1])<<8);
315 case 1 : a += (k[0]);
316 /* case 0: nothing left to add */
319 mix(a, b, c);
321 /*-------------------------------------------- report the result */
322 return c;
325 #undef keycompute
326 #undef mix
327 #undef hash_mask
329 #endif /* MIX_HASH */