src/util/hash.c

   1 /* Hashing infrastructure */
   2
   3 #ifdef HAVE_CONFIG_H
   4 #include "config.h"
   5 #endif
   6
   7 #include <string.h>
   8
   9 #include "elinks.h"
  10
  11 #include "util/hash.h"
  12 #include "util/memory.h"
  13
  14
  15 /* String hashes functions chooser. */
  16 /* #define MIX_HASH */ /* Far more better but slower */
  17 #define X31_HASH /* Weaker but faster */
  18
  19
  20 /* We provide common infrastructure for hashing - each hash consists from one
  21  * particularly large array full of small lists of keys with same index in the
  22  * array (same hash value). */
  23
  24 #define hash_mask(n) (hash_size(n) - 1)
  25
  26 struct hash *
  27 init_hash(unsigned int width, hash_func_T func)
  28 {
  29         struct hash *hash;
  30         unsigned int i = 0;
  31
  32         assert(width > 0 && func);
  33         if_assert_failed return NULL;
  34
  35         /* One is already reserved in struct hash, so use size - 1. */
  36         hash = mem_alloc(sizeof(*hash) + (hash_size(width) - 1)
  37                          * sizeof(struct list_head));
  38         if (!hash) return NULL;
  39
  40         hash->width = width;
  41         hash->func = func;
  42
  43         /* Initialize dummy list_heads */
  44         for (; i < hash_size(width); i++)
  45                 init_list(hash->hash[i]);
  46
  47         return hash;
  48 }
  49
  50 void
  51 free_hash(struct hash *hash)
  52 {
  53         unsigned int i = 0;
  54
  55         assert(hash);
  56         if_assert_failed return;
  57
  58         for (; i < hash_size(hash->width); i++)
  59                 free_list(hash->hash[i]);
  60
  61         mem_free(hash);
  62 }
  63
  64
  65 /* I've no much idea about what to set here.. I think it doesn't matter much
  66  * anyway.. ;) --pasky */
  67 #define HASH_MAGIC 0xdeadbeef
  68
  69 /* Returns hash_item if ok, NULL if error. */
  70 struct hash_item *
  71 add_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen,
  72               void *value)
  73 {
  74         hash_value_T hashval;
  75         struct hash_item *item = mem_alloc(sizeof(*item));
  76
  77         if (!item) return NULL;
  78
  79         hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
  80
  81         item->key = key;
  82         item->keylen = keylen;
  83         item->value = value;
  84
  85         add_to_list(hash->hash[hashval], item);
  86
  87         return item;
  88 }
  89
  90 struct hash_item *
  91 get_hash_item(struct hash *hash, unsigned char *key, unsigned int keylen)
  92 {
  93         struct list_head *list;
  94         struct hash_item *item;
  95         hash_value_T hashval;
  96
  97         hashval = hash->func(key, keylen, HASH_MAGIC) & hash_mask(hash->width);
  98         list    = &hash->hash[hashval];
  99
 100         foreach (item, *list) {
 101                 if (keylen != item->keylen) continue;
 102                 if (memcmp(key, item->key, keylen)) continue;
 103
 104                 /* Watch the MFR (Move Front Rule)! Basically, it self-orders
 105                  * the list by popularity of its items. Inspired from Links,
 106                  * probably PerM. --pasky */
 107                 move_to_top_of_list(*list, item);
 108
 109                 return item;
 110         }
 111
 112         return NULL;
 113 }
 114
 115 #undef HASH_MAGIC
 116
 117 /* If key and/or value were dynamically allocated, think about freeing them.
 118  * This function doesn't do that. */
 119 void
 120 del_hash_item(struct hash *hash, struct hash_item *item)
 121 {
 122         assert(item);
 123         if_assert_failed return;
 124
 125         del_from_list(item);
 126         mem_free(item);
 127 }
 128
 129
 130 #ifdef X31_HASH
 131
 132 /* Fast string hashing. */
 133 hash_value_T
 134 strhash(unsigned char *k, /* the key */
 135         unsigned int length, /* the length of the key */
 136         hash_value_T initval /* the previous hash, or an arbitrary value */)
 137 {
 138         const unsigned char *p = (const unsigned char *) k;
 139         hash_value_T h = initval;
 140         unsigned int i = 0;
 141
 142         assert(k && length > 0);
 143         if_assert_failed return h;
 144
 145         /* This loop was unrolled, should improve performance on most cpus,
 146          * After some tests, 8 seems a good value for most keys. --Zas */
 147         for (;;) {
 148                 h = (h << 5) - h + p[i];
 149                 if (++i == length) break;
 150                 h = (h << 5) - h + p[i];
 151                 if (++i == length) break;
 152                 h = (h << 5) - h + p[i];
 153                 if (++i == length) break;
 154                 h = (h << 5) - h + p[i];
 155                 if (++i == length) break;
 156                 h = (h << 5) - h + p[i];
 157                 if (++i == length) break;
 158                 h = (h << 5) - h + p[i];
 159                 if (++i == length) break;
 160                 h = (h << 5) - h + p[i];
 161                 if (++i == length) break;
 162                 h = (h << 5) - h + p[i];
 163                 if (++i == length) break;
 164         };
 165
 166         return h;
 167 }
 168
 169 #else /* X31_HASH */
 170
 171 /* String hashing function follows; it is not written by me, somewhere below
 172  * are credits. I only hacked it a bit. --pasky */
 173
 174 /* This is a big CPU hog, in fact:
 175  *
 176  *   %   cumulative   self              self     total-----------
 177  *  time   seconds   seconds    calls  us/call  us/call  name----
 178  *   6.00      0.35     0.06    10126     5.93     5.93  strhash
 179  *
 180  * It should be investigated whether we couldn't push this down a little. */
 181
 182 /*
 183  *  --------------------------------------------------------------------
 184  *   mix -- mix 3 32-bit values reversibly.
 185  *   For every delta with one or two bits set, and the deltas of all three
 186  *     high bits or all three low bits, whether the original value of a,b,c
 187  *     is almost all zero or is uniformly distributed,
 188  *   * If mix() is run forward or backward, at least 32 bits in a,b,c
 189  *     have at least 1/4 probability of changing.
 190  *   * If mix() is run forward, every bit of c will change between 1/3 and
 191  *     2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
 192  *     mix() was built out of 36 single-cycle latency instructions in a
 193  *     structure that could supported 2x parallelism, like so:
 194  *       a -= b;
 195  *       a -= c; x = (c>>13);
 196  *       b -= c; a ^= x;
 197  *       b -= a; x = (a<<8);
 198  *       c -= a; b ^= x;
 199  *       c -= b; x = (b>>13);
 200  *       ...
 201  *     Unfortunately, superscalar Pentiums and Sparcs can't take advantage
 202  *     of that parallelism.  They've also turned some of those single-cycle
 203  *     latency instructions into multi-cycle latency instructions.  Still,
 204  *     this is the fastest good hash I could find.  There were about 2^^68
 205  *     to choose from.  I only looked at a billion or so.
 206  *  --------------------------------------------------------------------
 207 */
 208
 209 #define mix(a, b, c) { \
 210         a -= b; a -= c; a ^= (c>>13); \
 211         b -= c; b -= a; b ^= (a<<8); \
 212         c -= a; c -= b; c ^= (b>>13); \
 213         a -= b; a -= c; a ^= (c>>12);  \
 214         b -= c; b -= a; b ^= (a<<16); \
 215         c -= a; c -= b; c ^= (b>>5); \
 216         a -= b; a -= c; a ^= (c>>3);  \
 217         b -= c; b -= a; b ^= (a<<10); \
 218         c -= a; c -= b; c ^= (b>>15); \
 219 }
 220
 221  /*
 222  --------------------------------------------------------------------
 223  hash() -- hash a variable-length key into a 32-bit value
 224    k       : the key (the unaligned variable-length array of bytes)
 225    len     : the length of the key, counting by bytes
 226    initval : can be any 4-byte value
 227  Returns a 32-bit value.  Every bit of the key affects every bit of
 228  the return value.  Every 1-bit and 2-bit delta achieves avalanche.
 229  About 6*len+35 instructions.
 230
 231  The best hash table sizes are powers of 2.  There is no need to do
 232  mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 233  use a bitmask.  For example, if you need only 10 bits, do
 234    h = (h & hashmask(10));
 235  In which case, the hash table should have hashsize(10) elements.
 236
 237  If you are hashing n strings (ub1 **) k, do it like this:
 238    for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 239
 240  By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
 241  code any way you wish, private, educational, or commercial.  It's free.
 242
 243  See http://burtleburtle.net/bob/hash/evahash.html
 244  Use for hash table lookup, or anything where one collision in 2^^32 is
 245  acceptable.  Do NOT use for cryptographic purposes.
 246  --------------------------------------------------------------------
 247  */
 248
 249 #define keycompute(a) ((k[(a)]) \
 250                         + ((hash_value_T) (k[(a)+1])<<8) \
 251                         + ((hash_value_T) (k[(a)+2])<<16) \
 252                         + ((hash_value_T) (k[(a)+3])<<24))
 253
 254 hash_value_T
 255 strhash(unsigned char *k, /* the key */
 256         unsigned int length, /* the length of the key */
 257         hash_value_T initval /* the previous hash, or an arbitrary value */)
 258 {
 259         int len;
 260         hash_value_T a, b, c;
 261
 262         /* Set up the internal state */
 263         len = length;
 264         a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
 265         c = initval;         /* the previous hash value */
 266
 267         /*---------------------------------------- handle most of the key */
 268         while (len >= 12) {
 269                 a += keycompute(0);
 270                 b += keycompute(4);
 271                 c += keycompute(8);
 272                 mix(a, b, c);
 273                 k += 12;
 274                 len -= 12;
 275         }
 276
 277         /*------------------------------------- handle the last 11 bytes */
 278         c += length;
 279         switch (len) {  /* all the case statements fall through */
 280                 case 11: c += ((hash_value_T) (k[10])<<24);
 281                 case 10: c += ((hash_value_T) (k[9])<<16);
 282                 case 9 : c += ((hash_value_T) (k[8])<<8);
 283                         /* the first byte of c is reserved for the length */
 284                 case 8 : b += ((hash_value_T) (k[7])<<24);
 285                 case 7 : b += ((hash_value_T) (k[6])<<16);
 286                 case 6 : b += ((hash_value_T) (k[5])<<8);
 287                 case 5 : b += (k[4]);
 288                 case 4 : a += ((hash_value_T) (k[3])<<24);
 289                 case 3 : a += ((hash_value_T) (k[2])<<16);
 290                 case 2 : a += ((hash_value_T) (k[1])<<8);
 291                 case 1 : a += (k[0]);
 292                         /* case 0: nothing left to add */
 293         }
 294
 295         mix(a, b, c);
 296
 297         /*-------------------------------------------- report the result */
 298         return c;
 299 }
 300
 301 #undef keycompute
 302 #undef mix
 303 #undef hash_mask
 304
 305 #endif /* MIX_HASH */