src/util/hash.c

   1 /* Hashing infrastructure */
   2
   3 #ifdef HAVE_CONFIG_H
   4 #include "config.h"
   5 #endif
   6
   7 #include <string.h>
   8
   9 #include "elinks.h"
  10
  11 #include "util/hash.h"
  12 #include "util/memory.h"
  13
  14
  15 /* String hashes functions chooser. */
  16 /* #define MIX_HASH */ /* Far more better but slower */
  17 #define X31_HASH /* Weaker but faster */
  18
  19
  20 /* We provide common infrastructure for hashing - each hash consists from one
  21  * particularly large array full of small lists of keys with same index in the
  22  * array (same hash value). */
  23
  24 #define hash_mask(n) (hash_size(n) - 1)
  25 #define hash_size(n) (1 << (n))
  26
  27 static hash_value_T strhash(unsigned char *k, unsigned int length, hash_value_T initval);
  28
  29 static inline struct hash *
  30 init_hash(unsigned int width, hash_func_T func)
  31 {
  32         struct hash *hash;
  33         unsigned int i = 0;
  34
  35         assert(width > 0 && func);
  36         if_assert_failed return NULL;
  37
  38         /* One is already reserved in struct hash, so use size - 1. */
  39         hash = mem_alloc(sizeof(*hash) + (hash_size(width) - 1)
  40                          * sizeof(struct list_head));
  41         if (!hash) return NULL;
  42
  43         hash->width = width;
  44         hash->func = func;
  45
  46         /* Initialize dummy list_heads */
  47         for (; i < hash_size(width); i++)
  48                 init_list(hash->hash[i]);
  49
  50         return hash;
  51 }
  52
  53 struct hash *
  54 init_hash8(void)
  55 {
  56         return init_hash(8, &strhash);
  57 }
  58
  59 void
  60 free_hash(struct hash **hashp)
  61 {
  62         unsigned int i = 0;
  63
  64         assert(hashp && *hashp);
  65         if_assert_failed return;
  66
  67         for (; i < hash_size((*hashp)->width); i++)
  68                 free_list((*hashp)->hash[i]);
  69
  70         mem_free_set(hashp, NULL);
  71 }
  72
  73
  74 /* I've no much idea about what to set here.. I think it doesn't matter much
  75  * anyway.. ;) --pasky */
  76 #define HASH_MAGIC 0xdeadbeef
  77
  78 /* Returns hash_item if ok, NULL if error. */
  79 struct hash_item *
  80 add_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen,
  81               void *value)
  82 {
  83         hash_value_T hashval;
  84         struct hash_item *item = mem_alloc(sizeof(*item));
  85
  86         if (!item) return NULL;
  87
  88         hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
  89
  90         item->key = key;
  91         item->keylen = keylen;
  92         item->value = value;
  93
  94         add_to_list(hash->hash[hashval], item);
  95
  96         return item;
  97 }
  98
  99 struct hash_item *
 100 get_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen)
 101 {
 102         struct list_head *list;
 103         struct hash_item *item;
 104         hash_value_T hashval;
 105
 106         hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
 107         list    = &hash->hash[hashval];
 108
 109         foreach (item, *list) {
 110                 if (keylen != item->keylen) continue;
 111                 if (memcmp(key, item->key, keylen)) continue;
 112
 113                 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
 114                  * the list by popularity of its items. Inspired from Links,
 115                  * probably PerM. --pasky */
 116                 move_to_top_of_list(*list, item);
 117
 118                 return item;
 119         }
 120
 121         return NULL;
 122 }
 123
 124 #undef HASH_MAGIC
 125
 126 /* If key and/or value were dynamically allocated, think about freeing them.
 127  * This function doesn't do that. */
 128 void
 129 del_hash_item(struct hash *hash, struct hash_item *item)
 130 {
 131         assert(item);
 132         if_assert_failed return;
 133
 134         del_from_list(item);
 135         mem_free(item);
 136 }
 137
 138
 139 #ifdef X31_HASH
 140
 141 /* Fast string hashing. */
 142 static hash_value_T
 143 strhash(unsigned char *k, /* the key */
 144         unsigned int length, /* the length of the key */
 145         hash_value_T initval /* the previous hash, or an arbitrary value */)
 146 {
 147         const unsigned char *p = (const unsigned char *) k;
 148         hash_value_T h = initval;
 149         unsigned int i = 0;
 150
 151         assert(k && length > 0);
 152         if_assert_failed return h;
 153
 154         /* This loop was unrolled, should improve performance on most cpus,
 155          * After some tests, 8 seems a good value for most keys. --Zas */
 156         for (;;) {
 157                 h = (h << 5) - h + p[i];
 158                 if (++i == length) break;
 159                 h = (h << 5) - h + p[i];
 160                 if (++i == length) break;
 161                 h = (h << 5) - h + p[i];
 162                 if (++i == length) break;
 163                 h = (h << 5) - h + p[i];
 164                 if (++i == length) break;
 165                 h = (h << 5) - h + p[i];
 166                 if (++i == length) break;
 167                 h = (h << 5) - h + p[i];
 168                 if (++i == length) break;
 169                 h = (h << 5) - h + p[i];
 170                 if (++i == length) break;
 171                 h = (h << 5) - h + p[i];
 172                 if (++i == length) break;
 173         };
 174
 175         return h;
 176 }
 177
 178 #else /* X31_HASH */
 179
 180 /* String hashing function follows; it is not written by me, somewhere below
 181  * are credits. I only hacked it a bit. --pasky */
 182
 183 /* This is a big CPU hog, in fact:
 184  *
 185  *   %   cumulative   self              self     total-----------
 186  *  time   seconds   seconds    calls  us/call  us/call  name----
 187  *   6.00      0.35     0.06    10126     5.93     5.93  strhash
 188  *
 189  * It should be investigated whether we couldn't push this down a little. */
 190
 191 /*
 192  *  --------------------------------------------------------------------
 193  *   mix -- mix 3 32-bit values reversibly.
 194  *   For every delta with one or two bits set, and the deltas of all three
 195  *     high bits or all three low bits, whether the original value of a,b,c
 196  *     is almost all zero or is uniformly distributed,
 197  *   * If mix() is run forward or backward, at least 32 bits in a,b,c
 198  *     have at least 1/4 probability of changing.
 199  *   * If mix() is run forward, every bit of c will change between 1/3 and
 200  *     2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
 201  *     mix() was built out of 36 single-cycle latency instructions in a
 202  *     structure that could supported 2x parallelism, like so:
 203  *       a -= b;
 204  *       a -= c; x = (c>>13);
 205  *       b -= c; a ^= x;
 206  *       b -= a; x = (a<<8);
 207  *       c -= a; b ^= x;
 208  *       c -= b; x = (b>>13);
 209  *       ...
 210  *     Unfortunately, superscalar Pentiums and Sparcs can't take advantage
 211  *     of that parallelism.  They've also turned some of those single-cycle
 212  *     latency instructions into multi-cycle latency instructions.  Still,
 213  *     this is the fastest good hash I could find.  There were about 2^^68
 214  *     to choose from.  I only looked at a billion or so.
 215  *  --------------------------------------------------------------------
 216 */
 217
 218 #define mix(a, b, c) { \
 219         a -= b; a -= c; a ^= (c>>13); \
 220         b -= c; b -= a; b ^= (a<<8); \
 221         c -= a; c -= b; c ^= (b>>13); \
 222         a -= b; a -= c; a ^= (c>>12);  \
 223         b -= c; b -= a; b ^= (a<<16); \
 224         c -= a; c -= b; c ^= (b>>5); \
 225         a -= b; a -= c; a ^= (c>>3);  \
 226         b -= c; b -= a; b ^= (a<<10); \
 227         c -= a; c -= b; c ^= (b>>15); \
 228 }
 229
 230  /*
 231  --------------------------------------------------------------------
 232  hash() -- hash a variable-length key into a 32-bit value
 233    k       : the key (the unaligned variable-length array of bytes)
 234    len     : the length of the key, counting by bytes
 235    initval : can be any 4-byte value
 236  Returns a 32-bit value.  Every bit of the key affects every bit of
 237  the return value.  Every 1-bit and 2-bit delta achieves avalanche.
 238  About 6*len+35 instructions.
 239
 240  The best hash table sizes are powers of 2.  There is no need to do
 241  mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 242  use a bitmask.  For example, if you need only 10 bits, do
 243    h = (h & hashmask(10));
 244  In which case, the hash table should have hashsize(10) elements.
 245
 246  If you are hashing n strings (ub1 **) k, do it like this:
 247    for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 248
 249  By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
 250  code any way you wish, private, educational, or commercial.  It's free.
 251
 252  See http://burtleburtle.net/bob/hash/evahash.html
 253  Use for hash table lookup, or anything where one collision in 2^^32 is
 254  acceptable.  Do NOT use for cryptographic purposes.
 255  --------------------------------------------------------------------
 256  */
 257
 258 #define keycompute(a) ((k[(a)]) \
 259                         + ((hash_value_T) (k[(a)+1])<<8) \
 260                         + ((hash_value_T) (k[(a)+2])<<16) \
 261                         + ((hash_value_T) (k[(a)+3])<<24))
 262
 263 static hash_value_T
 264 strhash(unsigned char *k, /* the key */
 265         unsigned int length, /* the length of the key */
 266         hash_value_T initval /* the previous hash, or an arbitrary value */)
 267 {
 268         int len;
 269         hash_value_T a, b, c;
 270
 271         /* Set up the internal state */
 272         len = length;
 273         a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
 274         c = initval;         /* the previous hash value */
 275
 276         /*---------------------------------------- handle most of the key */
 277         while (len >= 12) {
 278                 a += keycompute(0);
 279                 b += keycompute(4);
 280                 c += keycompute(8);
 281                 mix(a, b, c);
 282                 k += 12;
 283                 len -= 12;
 284         }
 285
 286         /*------------------------------------- handle the last 11 bytes */
 287         c += length;
 288         switch (len) {  /* all the case statements fall through */
 289                 case 11: c += ((hash_value_T) (k[10])<<24);
 290                 case 10: c += ((hash_value_T) (k[9])<<16);
 291                 case 9 : c += ((hash_value_T) (k[8])<<8);
 292                         /* the first byte of c is reserved for the length */
 293                 case 8 : b += ((hash_value_T) (k[7])<<24);
 294                 case 7 : b += ((hash_value_T) (k[6])<<16);
 295                 case 6 : b += ((hash_value_T) (k[5])<<8);
 296                 case 5 : b += (k[4]);
 297                 case 4 : a += ((hash_value_T) (k[3])<<24);
 298                 case 3 : a += ((hash_value_T) (k[2])<<16);
 299                 case 2 : a += ((hash_value_T) (k[1])<<8);
 300                 case 1 : a += (k[0]);
 301                         /* case 0: nothing left to add */
 302         }
 303
 304         mix(a, b, c);
 305
 306         /*-------------------------------------------- report the result */
 307         return c;
 308 }
 309
 310 #undef keycompute
 311 #undef mix
 312 #undef hash_mask
 313
 314 #endif /* MIX_HASH */