lookup3.c

   1 /*
   2 -------------------------------------------------------------------------------
   3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
   4
   5 These are functions for producing 32-bit hashes for hash table lookup.
   6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
   7 are externally useful functions.  Routines to test the hash are included
   8 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
   9 the public domain.  It has no warranty.
  10
  11 You probably want to use hashlittle().  hashlittle() and hashbig()
  12 hash byte arrays.  hashlittle() is is faster than hashbig() on
  13 little-endian machines.  Intel and AMD are little-endian machines.
  14 On second thought, you probably want hashlittle2(), which is identical to
  15 hashlittle() except it returns two 32-bit hashes for the price of one.
  16 You could implement hashbig2() if you wanted but I haven't bothered here.
  17
  18 If you want to find a hash of, say, exactly 7 integers, do
  19   a = i1;  b = i2;  c = i3;
  20   mix(a,b,c);
  21   a += i4; b += i5; c += i6;
  22   mix(a,b,c);
  23   a += i7;
  24   final(a,b,c);
  25 then use c as the hash value.  If you have a variable length array of
  26 4-byte integers to hash, use hashword().  If you have a byte array (like
  27 a character string), use hashlittle().  If you have several byte arrays, or
  28 a mix of things, see the comments above hashlittle().
  29
  30 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
  31 then mix those integers.  This is fast (you can do a lot more thorough
  32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
  33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
  34 -------------------------------------------------------------------------------
  35 */
  36
  37 #include <stdint.h>     /* defines uint32_t etc */
  38 #include <sys/param.h>  /* attempt to define endianness */
  39 #ifdef linux
  40 # include <endian.h>    /* attempt to define endianness */
  41 #endif
  42
  43 #define VALGRIND
  44
  45 /*
  46  * My best guess at if you are big-endian or little-endian.  This may
  47  * need adjustment.
  48  */
  49 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
  50      __BYTE_ORDER == __LITTLE_ENDIAN) || \
  51     (defined(i386) || defined(__i386__) || defined(__i486__) || \
  52      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
  53 # define HASH_LITTLE_ENDIAN 1
  54 # define HASH_BIG_ENDIAN 0
  55 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
  56        __BYTE_ORDER == __BIG_ENDIAN) || \
  57       (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
  58 # define HASH_LITTLE_ENDIAN 0
  59 # define HASH_BIG_ENDIAN 1
  60 #else
  61 # define HASH_LITTLE_ENDIAN 0
  62 # define HASH_BIG_ENDIAN 0
  63 #endif
  64
  65 #define hashsize(n) ((uint32_t)1<<(n))
  66 #define hashmask(n) (hashsize(n)-1)
  67 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  68
  69 /*
  70 -------------------------------------------------------------------------------
  71 mix -- mix 3 32-bit values reversibly.
  72
  73 This is reversible, so any information in (a,b,c) before mix() is
  74 still in (a,b,c) after mix().
  75
  76 If four pairs of (a,b,c) inputs are run through mix(), or through
  77 mix() in reverse, there are at least 32 bits of the output that
  78 are sometimes the same for one pair and different for another pair.
  79 This was tested for:
  80 * pairs that differed by one bit, by two bits, in any combination
  81   of top bits of (a,b,c), or in any combination of bottom bits of
  82   (a,b,c).
  83 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
  84   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  85   is commonly produced by subtraction) look like a single 1-bit
  86   difference.
  87 * the base values were pseudorandom, all zero but one bit set, or
  88   all zero plus a counter that starts at zero.
  89
  90 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
  91 satisfy this are
  92     4  6  8 16 19  4
  93     9 15  3 18 27 15
  94    14  9  3  7 17  3
  95 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
  96 for "differ" defined as + with a one-bit base and a two-bit delta.  I
  97 used http://burtleburtle.net/bob/hash/avalanche.html to choose
  98 the operations, constants, and arrangements of the variables.
  99
 100 This does not achieve avalanche.  There are input bits of (a,b,c)
 101 that fail to affect some output bits of (a,b,c), especially of a.  The
 102 most thoroughly mixed value is c, but it doesn't really even achieve
 103 avalanche in c.
 104
 105 This allows some parallelism.  Read-after-writes are good at doubling
 106 the number of bits affected, so the goal of mixing pulls in the opposite
 107 direction as the goal of parallelism.  I did what I could.  Rotates
 108 seem to cost as much as shifts on every machine I could lay my hands
 109 on, and rotates are much kinder to the top and bottom bits, so I used
 110 rotates.
 111 -------------------------------------------------------------------------------
 112 */
 113 #define mix(a,b,c) \
 114 { \
 115   a -= c;  a ^= rot(c, 4);  c += b; \
 116   b -= a;  b ^= rot(a, 6);  a += c; \
 117   c -= b;  c ^= rot(b, 8);  b += a; \
 118   a -= c;  a ^= rot(c,16);  c += b; \
 119   b -= a;  b ^= rot(a,19);  a += c; \
 120   c -= b;  c ^= rot(b, 4);  b += a; \
 121 }
 122
 123 /*
 124 -------------------------------------------------------------------------------
 125 final -- final mixing of 3 32-bit values (a,b,c) into c
 126
 127 Pairs of (a,b,c) values differing in only a few bits will usually
 128 produce values of c that look totally different.  This was tested for
 129 * pairs that differed by one bit, by two bits, in any combination
 130   of top bits of (a,b,c), or in any combination of bottom bits of
 131   (a,b,c).
 132 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 133   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 134   is commonly produced by subtraction) look like a single 1-bit
 135   difference.
 136 * the base values were pseudorandom, all zero but one bit set, or
 137   all zero plus a counter that starts at zero.
 138
 139 These constants passed:
 140  14 11 25 16 4 14 24
 141  12 14 25 16 4 14 24
 142 and these came close:
 143   4  8 15 26 3 22 24
 144  10  8 15 26 3 22 24
 145  11  8 15 26 3 22 24
 146 -------------------------------------------------------------------------------
 147 */
 148 #define final(a,b,c) \
 149 { \
 150   c ^= b; c -= rot(b,14); \
 151   a ^= c; a -= rot(c,11); \
 152   b ^= a; b -= rot(a,25); \
 153   c ^= b; c -= rot(b,16); \
 154   a ^= c; a -= rot(c,4);  \
 155   b ^= a; b -= rot(a,14); \
 156   c ^= b; c -= rot(b,24); \
 157 }
 158
 159 /*
 160 -------------------------------------------------------------------------------
 161 hashlittle() -- hash a variable-length key into a 32-bit value
 162   k       : the key (the unaligned variable-length array of bytes)
 163   length  : the length of the key, counting by bytes
 164   initval : can be any 4-byte value
 165 Returns a 32-bit value.  Every bit of the key affects every bit of
 166 the return value.  Two keys differing by one or two bits will have
 167 totally different hash values.
 168
 169 The best hash table sizes are powers of 2.  There is no need to do
 170 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 171 use a bitmask.  For example, if you need only 10 bits, do
 172   h = (h & hashmask(10));
 173 In which case, the hash table should have hashsize(10) elements.
 174
 175 If you are hashing n strings (uint8_t **)k, do it like this:
 176   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
 177
 178 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
 179 code any way you wish, private, educational, or commercial.  It's free.
 180
 181 Use for hash table lookup, or anything where one collision in 2^^32 is
 182 acceptable.  Do NOT use for cryptographic purposes.
 183 -------------------------------------------------------------------------------
 184 */
 185
 186 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
 187 {
 188   uint32_t a,b,c;                                          /* internal state */
 189   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 190
 191   /* Set up the internal state */
 192   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
 193
 194   u.ptr = key;
 195   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
 196     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 197
 198     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 199     while (length > 12)
 200     {
 201       a += k[0];
 202       b += k[1];
 203       c += k[2];
 204       mix(a,b,c);
 205       length -= 12;
 206       k += 3;
 207     }
 208
 209     /*----------------------------- handle the last (probably partial) block */
 210     /*
 211      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 212      * then masks off the part it's not allowed to read.  Because the
 213      * string is aligned, the masked-off tail is in the same word as the
 214      * rest of the string.  Every machine with memory protection I've seen
 215      * does it on word boundaries, so is OK with this.  But VALGRIND will
 216      * still catch it and complain.  The masking trick does make the hash
 217      * noticably faster for short strings (like English words).
 218      */
 219 #ifndef VALGRIND
 220
 221     switch(length)
 222     {
 223     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 224     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 225     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 226     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 227     case 8 : b+=k[1]; a+=k[0]; break;
 228     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 229     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 230     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 231     case 4 : a+=k[0]; break;
 232     case 3 : a+=k[0]&0xffffff; break;
 233     case 2 : a+=k[0]&0xffff; break;
 234     case 1 : a+=k[0]&0xff; break;
 235     case 0 : return c;              /* zero length strings require no mixing */
 236     }
 237
 238 #else /* make valgrind happy */
 239
 240     const uint8_t  *k8 = (const uint8_t *)k;
 241     switch(length)
 242     {
 243     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 244     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
 245     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
 246     case 9 : c+=k8[8];                   /* fall through */
 247     case 8 : b+=k[1]; a+=k[0]; break;
 248     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
 249     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
 250     case 5 : b+=k8[4];                   /* fall through */
 251     case 4 : a+=k[0]; break;
 252     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
 253     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
 254     case 1 : a+=k8[0]; break;
 255     case 0 : return c;
 256     }
 257
 258 #endif /* !valgrind */
 259
 260   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
 261     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 262     const uint8_t  *k8;
 263
 264     /*--------------- all but last block: aligned reads and different mixing */
 265     while (length > 12)
 266     {
 267       a += k[0] + (((uint32_t)k[1])<<16);
 268       b += k[2] + (((uint32_t)k[3])<<16);
 269       c += k[4] + (((uint32_t)k[5])<<16);
 270       mix(a,b,c);
 271       length -= 12;
 272       k += 6;
 273     }
 274
 275     /*----------------------------- handle the last (probably partial) block */
 276     k8 = (const uint8_t *)k;
 277     switch(length)
 278     {
 279     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 280              b+=k[2]+(((uint32_t)k[3])<<16);
 281              a+=k[0]+(((uint32_t)k[1])<<16);
 282              break;
 283     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 284     case 10: c+=k[4];
 285              b+=k[2]+(((uint32_t)k[3])<<16);
 286              a+=k[0]+(((uint32_t)k[1])<<16);
 287              break;
 288     case 9 : c+=k8[8];                      /* fall through */
 289     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 290              a+=k[0]+(((uint32_t)k[1])<<16);
 291              break;
 292     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 293     case 6 : b+=k[2];
 294              a+=k[0]+(((uint32_t)k[1])<<16);
 295              break;
 296     case 5 : b+=k8[4];                      /* fall through */
 297     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 298              break;
 299     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 300     case 2 : a+=k[0];
 301              break;
 302     case 1 : a+=k8[0];
 303              break;
 304     case 0 : return c;                     /* zero length requires no mixing */
 305     }
 306
 307   } else {                        /* need to read the key one byte at a time */
 308     const uint8_t *k = (const uint8_t *)key;
 309
 310     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 311     while (length > 12)
 312     {
 313       a += k[0];
 314       a += ((uint32_t)k[1])<<8;
 315       a += ((uint32_t)k[2])<<16;
 316       a += ((uint32_t)k[3])<<24;
 317       b += k[4];
 318       b += ((uint32_t)k[5])<<8;
 319       b += ((uint32_t)k[6])<<16;
 320       b += ((uint32_t)k[7])<<24;
 321       c += k[8];
 322       c += ((uint32_t)k[9])<<8;
 323       c += ((uint32_t)k[10])<<16;
 324       c += ((uint32_t)k[11])<<24;
 325       mix(a,b,c);
 326       length -= 12;
 327       k += 12;
 328     }
 329
 330     /*-------------------------------- last block: affect all 32 bits of (c) */
 331     switch(length)                   /* all the case statements fall through */
 332     {
 333     case 12: c+=((uint32_t)k[11])<<24;
 334     case 11: c+=((uint32_t)k[10])<<16;
 335     case 10: c+=((uint32_t)k[9])<<8;
 336     case 9 : c+=k[8];
 337     case 8 : b+=((uint32_t)k[7])<<24;
 338     case 7 : b+=((uint32_t)k[6])<<16;
 339     case 6 : b+=((uint32_t)k[5])<<8;
 340     case 5 : b+=k[4];
 341     case 4 : a+=((uint32_t)k[3])<<24;
 342     case 3 : a+=((uint32_t)k[2])<<16;
 343     case 2 : a+=((uint32_t)k[1])<<8;
 344     case 1 : a+=k[0];
 345              break;
 346     case 0 : return c;
 347     }
 348   }
 349
 350   final(a,b,c);
 351   return c;
 352 }
 353