Add a stupid test script to print CGI variables set by ELinks
[elinks.git] / src / util / hash.c
blob915191d5b32fa184746bfc5cc67e2d4b0b0a3a48
1 /* Hashing infrastructure */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <string.h>
9 #include "elinks.h"
11 #include "util/hash.h"
12 #include "util/memory.h"
15 /* String hashes functions chooser. */
16 /* #define MIX_HASH */ /* Far more better but slower */
17 #define X31_HASH /* Weaker but faster */
20 /* We provide common infrastructure for hashing - each hash consists from one
21 * particularly large array full of small lists of keys with same index in the
22 * array (same hash value). */
24 #define hash_mask(n) (hash_size(n) - 1)
26 struct hash *
27 init_hash(unsigned int width, hash_func_T func)
29 struct hash *hash;
30 unsigned int i = 0;
32 assert(width > 0 && func);
33 if_assert_failed return NULL;
35 /* One is already reserved in struct hash, so use size - 1. */
36 hash = mem_alloc(sizeof(*hash) + (hash_size(width) - 1)
37 * sizeof(struct list_head));
38 if (!hash) return NULL;
40 hash->width = width;
41 hash->func = func;
43 /* Initialize dummy list_heads */
44 for (; i < hash_size(width); i++)
45 init_list(hash->hash[i]);
47 return hash;
50 void
51 free_hash(struct hash *hash)
53 unsigned int i = 0;
55 assert(hash);
56 if_assert_failed return;
58 for (; i < hash_size(hash->width); i++)
59 free_list(hash->hash[i]);
61 mem_free(hash);
65 /* I've no much idea about what to set here.. I think it doesn't matter much
66 * anyway.. ;) --pasky */
67 #define HASH_MAGIC 0xdeadbeef
69 /* Returns hash_item if ok, NULL if error. */
70 struct hash_item *
71 add_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen,
72 void *value)
74 hash_value_T hashval;
75 struct hash_item *item = mem_alloc(sizeof(*item));
77 if (!item) return NULL;
79 hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
81 item->key = key;
82 item->keylen = keylen;
83 item->value = value;
85 add_to_list(hash->hash[hashval], item);
87 return item;
90 struct hash_item *
91 get_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen)
93 struct list_head *list;
94 struct hash_item *item;
95 hash_value_T hashval;
97 hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
98 list = &hash->hash[hashval];
100 foreach (item, *list) {
101 if (keylen != item->keylen) continue;
102 if (memcmp(key, item->key, keylen)) continue;
104 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
105 * the list by popularity of its items. Inspired from Links,
106 * probably PerM. --pasky */
107 move_to_top_of_list(*list, item);
109 return item;
112 return NULL;
115 #undef HASH_MAGIC
117 /* If key and/or value were dynamically allocated, think about freeing them.
118 * This function doesn't do that. */
119 void
120 del_hash_item(struct hash *hash, struct hash_item *item)
122 assert(item);
123 if_assert_failed return;
125 del_from_list(item);
126 mem_free(item);
130 #ifdef X31_HASH
132 /* Fast string hashing. */
133 hash_value_T
134 strhash(unsigned char *k, /* the key */
135 unsigned int length, /* the length of the key */
136 hash_value_T initval /* the previous hash, or an arbitrary value */)
138 const unsigned char *p = (const unsigned char *) k;
139 hash_value_T h = initval;
140 unsigned int i = 0;
142 assert(k && length > 0);
143 if_assert_failed return h;
145 /* This loop was unrolled, should improve performance on most cpus,
146 * After some tests, 8 seems a good value for most keys. --Zas */
147 for (;;) {
148 h = (h << 5) - h + p[i];
149 if (++i == length) break;
150 h = (h << 5) - h + p[i];
151 if (++i == length) break;
152 h = (h << 5) - h + p[i];
153 if (++i == length) break;
154 h = (h << 5) - h + p[i];
155 if (++i == length) break;
156 h = (h << 5) - h + p[i];
157 if (++i == length) break;
158 h = (h << 5) - h + p[i];
159 if (++i == length) break;
160 h = (h << 5) - h + p[i];
161 if (++i == length) break;
162 h = (h << 5) - h + p[i];
163 if (++i == length) break;
166 return h;
169 #else /* X31_HASH */
171 /* String hashing function follows; it is not written by me, somewhere below
172 * are credits. I only hacked it a bit. --pasky */
174 /* This is a big CPU hog, in fact:
176 * % cumulative self self total-----------
177 * time seconds seconds calls us/call us/call name----
178 * 6.00 0.35 0.06 10126 5.93 5.93 strhash
180 * It should be investigated whether we couldn't push this down a little. */
183 * --------------------------------------------------------------------
184 * mix -- mix 3 32-bit values reversibly.
185 * For every delta with one or two bits set, and the deltas of all three
186 * high bits or all three low bits, whether the original value of a,b,c
187 * is almost all zero or is uniformly distributed,
188 * * If mix() is run forward or backward, at least 32 bits in a,b,c
189 * have at least 1/4 probability of changing.
190 * * If mix() is run forward, every bit of c will change between 1/3 and
191 * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
192 * mix() was built out of 36 single-cycle latency instructions in a
193 * structure that could supported 2x parallelism, like so:
194 * a -= b;
195 * a -= c; x = (c>>13);
196 * b -= c; a ^= x;
197 * b -= a; x = (a<<8);
198 * c -= a; b ^= x;
199 * c -= b; x = (b>>13);
200 * ...
201 * Unfortunately, superscalar Pentiums and Sparcs can't take advantage
202 * of that parallelism. They've also turned some of those single-cycle
203 * latency instructions into multi-cycle latency instructions. Still,
204 * this is the fastest good hash I could find. There were about 2^^68
205 * to choose from. I only looked at a billion or so.
206 * --------------------------------------------------------------------
209 #define mix(a, b, c) { \
210 a -= b; a -= c; a ^= (c>>13); \
211 b -= c; b -= a; b ^= (a<<8); \
212 c -= a; c -= b; c ^= (b>>13); \
213 a -= b; a -= c; a ^= (c>>12); \
214 b -= c; b -= a; b ^= (a<<16); \
215 c -= a; c -= b; c ^= (b>>5); \
216 a -= b; a -= c; a ^= (c>>3); \
217 b -= c; b -= a; b ^= (a<<10); \
218 c -= a; c -= b; c ^= (b>>15); \
222 --------------------------------------------------------------------
223 hash() -- hash a variable-length key into a 32-bit value
224 k : the key (the unaligned variable-length array of bytes)
225 len : the length of the key, counting by bytes
226 initval : can be any 4-byte value
227 Returns a 32-bit value. Every bit of the key affects every bit of
228 the return value. Every 1-bit and 2-bit delta achieves avalanche.
229 About 6*len+35 instructions.
231 The best hash table sizes are powers of 2. There is no need to do
232 mod a prime (mod is sooo slow!). If you need less than 32 bits,
233 use a bitmask. For example, if you need only 10 bits, do
234 h = (h & hashmask(10));
235 In which case, the hash table should have hashsize(10) elements.
237 If you are hashing n strings (ub1 **) k, do it like this:
238 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
240 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
241 code any way you wish, private, educational, or commercial. It's free.
243 See http://burtleburtle.net/bob/hash/evahash.html
244 Use for hash table lookup, or anything where one collision in 2^^32 is
245 acceptable. Do NOT use for cryptographic purposes.
246 --------------------------------------------------------------------
249 #define keycompute(a) ((k[(a)]) \
250 + ((hash_value_T) (k[(a)+1])<<8) \
251 + ((hash_value_T) (k[(a)+2])<<16) \
252 + ((hash_value_T) (k[(a)+3])<<24))
254 hash_value_T
255 strhash(unsigned char *k, /* the key */
256 unsigned int length, /* the length of the key */
257 hash_value_T initval /* the previous hash, or an arbitrary value */)
259 int len;
260 hash_value_T a, b, c;
262 /* Set up the internal state */
263 len = length;
264 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
265 c = initval; /* the previous hash value */
267 /*---------------------------------------- handle most of the key */
268 while (len >= 12) {
269 a += keycompute(0);
270 b += keycompute(4);
271 c += keycompute(8);
272 mix(a, b, c);
273 k += 12;
274 len -= 12;
277 /*------------------------------------- handle the last 11 bytes */
278 c += length;
279 switch (len) { /* all the case statements fall through */
280 case 11: c += ((hash_value_T) (k[10])<<24);
281 case 10: c += ((hash_value_T) (k[9])<<16);
282 case 9 : c += ((hash_value_T) (k[8])<<8);
283 /* the first byte of c is reserved for the length */
284 case 8 : b += ((hash_value_T) (k[7])<<24);
285 case 7 : b += ((hash_value_T) (k[6])<<16);
286 case 6 : b += ((hash_value_T) (k[5])<<8);
287 case 5 : b += (k[4]);
288 case 4 : a += ((hash_value_T) (k[3])<<24);
289 case 3 : a += ((hash_value_T) (k[2])<<16);
290 case 2 : a += ((hash_value_T) (k[1])<<8);
291 case 1 : a += (k[0]);
292 /* case 0: nothing left to add */
295 mix(a, b, c);
297 /*-------------------------------------------- report the result */
298 return c;
301 #undef keycompute
302 #undef mix
303 #undef hash_mask
305 #endif /* MIX_HASH */