nmdb/hash.h

   1
   2 #ifndef _HASH_H
   3 #define _HASH_H
   4
   5 /*
   6  * Hash function used in the cache.
   7  *
   8  * This is kept here instead of a .c file so the compiler can inline if it
   9  * decides it's worth it.
  10  *
  11  * The hash function used is Austin Appleby's MurmurHash2. It used to be
  12  * Jenkins's one-at-a-time, but this one is much faster. However, we keep
  13  * the others that were tested for future comparison purposes.
  14  */
  15 #define hash(k, s) murmurhash2(k, s)
  16
  17 /* MurmurHash2, by Austin Appleby. The one we use.
  18  * It has been modify to fit into the coding style, to work on uint32_t
  19  * instead of ints, and the seed was fixed to a random number because it's not
  20  * an issue for us. The author placed it in the public domain, so it's safe.
  21  * http://sites.google.com/site/murmurhash/ */
  22 static uint32_t murmurhash2(const unsigned char *key, size_t len)
  23 {
  24         const uint32_t m = 0x5bd1e995;
  25         const int r = 24;
  26         const uint32_t seed = 0x34a4b627;
  27
  28         // Initialize the hash to a 'random' value
  29         uint32_t h = seed ^ len;
  30
  31         // Mix 4 bytes at a time into the hash
  32         while (len >= 4) {
  33                 uint32_t k = *(uint32_t *) key;
  34
  35                 k *= m;
  36                 k ^= k >> r;
  37                 k *= m;
  38
  39                 h *= m;
  40                 h ^= k;
  41
  42                 key += 4;
  43                 len -= 4;
  44         }
  45
  46         // Handle the last few bytes of the input array
  47         switch (len) {
  48                 case 3: h ^= key[2] << 16;
  49                 case 2: h ^= key[1] << 8;
  50                 case 1: h ^= key[0];
  51                         h *= m;
  52         }
  53
  54         // Do a few final mixes of the hash to ensure the last few
  55         // bytes are well-incorporated.
  56         h ^= h >> 13;
  57         h *= m;
  58         h ^= h >> 15;
  59
  60         return h;
  61 }
  62
  63
  64 /* Unused functions, left for comparison purposes */
  65 #if 0
  66
  67 /* Paul Hsieh's SuperFastHash, which is really fast, but a tad slower than
  68  * MurmurHash2.
  69  * http://www.azillionmonkeys.com/qed/hash.html */
  70 static uint32_t superfast(const unsigned char *data, size_t len)
  71 {
  72         uint32_t h = len, tmp;
  73         int rem;
  74
  75         /* this is the same as (*((const uint16_t *) (d))) on little endians,
  76          * but we keep the generic version since gcc compiles decent code for
  77          * both and there's no noticeable difference in performance */
  78         #define get16bits(d) ( \
  79                 ( ( (uint32_t) ( ((const uint8_t *)(d))[1] ) ) << 8 ) \
  80                 + (uint32_t) ( ((const uint8_t *)(d))[0] ) )
  81
  82         if (len <= 0 || data == NULL)
  83                 return 0;
  84
  85         rem = len & 3;
  86         len >>= 2;
  87
  88         /* Main loop */
  89         for (; len > 0; len--) {
  90                 h += get16bits(data);
  91                 tmp = (get16bits(data+2) << 11) ^ h;
  92                 h = (h << 16) ^ tmp;
  93                 data += 2*sizeof (uint16_t);
  94                 h += h >> 11;
  95         }
  96
  97         /* Handle end cases */
  98         switch (rem) {
  99                 case 3: h += get16bits(data);
 100                         h ^= h << 16;
 101                         h ^= data[sizeof(uint16_t)] << 18;
 102                         h += h >> 11;
 103                         break;
 104                 case 2: h += get16bits(data);
 105                         h ^= h << 11;
 106                         h += h >> 17;
 107                         break;
 108                 case 1: h += *data;
 109                         h ^= h << 10;
 110                         h += h >> 1;
 111         }
 112
 113         /* Force "avalanching" of final 127 bits */
 114         h ^= h << 3;
 115         h += h >> 5;
 116         h ^= h << 4;
 117         h += h >> 17;
 118         h ^= h << 25;
 119         h += h >> 6;
 120
 121         return h;
 122
 123         #undef get16bits
 124 }
 125
 126 /* Jenkins' one-at-a-time hash, the one we used to use.
 127  * http://en.wikipedia.org/wiki/Jenkins_hash_function */
 128 static uint32_t oneatatime(const unsigned char *key, const size_t ksize)
 129 {
 130         uint32_t h = 0;
 131         size_t i;
 132
 133         for (i = 0; i < ksize; i++) {
 134                 h += key[i];
 135                 h += (h << 10);
 136                 h ^= (h >> 6);
 137         }
 138         h += (h << 3);
 139         h ^= (h >> 11);
 140         h += (h << 15);
 141         return h;
 142 }
 143
 144 /* FNV 32 bit hash; slower than the rest.
 145  * http://www.isthe.com/chongo/tech/comp/fnv/ */
 146 static uint32_t fnv_hash(const unsigned char *key, const size_t ksize)
 147 {
 148         const unsigned char *end = key + ksize;
 149         uint32_t hval;
 150
 151         hval = 0x811c9dc5;
 152         while (key < end) {
 153                 hval += (hval<<1) + (hval<<4) + (hval<<7) +
 154                         (hval<<8) + (hval<<24);
 155                 hval ^= (uint32_t) *key++;
 156         }
 157
 158         return hval;
 159 }
 160
 161 #endif
 162
 163 #endif
 164