1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 func TestMemHash32Equality(t
*testing
.T
) {
19 t
.Skip("skipping since AES hash implementation is used")
22 r
:= rand
.New(rand
.NewSource(1234))
23 seed
:= uintptr(r
.Uint64())
24 for i
:= 0; i
< 100; i
++ {
26 got
:= MemHash32(unsafe
.Pointer(&b
), seed
)
27 want
:= MemHash(unsafe
.Pointer(&b
), seed
, 4)
29 t
.Errorf("MemHash32(%x, %v) = %v; want %v", b
, seed
, got
, want
)
34 func TestMemHash64Equality(t
*testing
.T
) {
36 t
.Skip("skipping since AES hash implementation is used")
39 r
:= rand
.New(rand
.NewSource(1234))
40 seed
:= uintptr(r
.Uint64())
41 for i
:= 0; i
< 100; i
++ {
43 got
:= MemHash64(unsafe
.Pointer(&b
), seed
)
44 want
:= MemHash(unsafe
.Pointer(&b
), seed
, 8)
46 t
.Errorf("MemHash64(%x, %v) = %v; want %v", b
, seed
, got
, want
)
51 // Smhasher is a torture test for hash functions.
52 // https://code.google.com/p/smhasher/
53 // This code is a port of some of the Smhasher tests to Go.
55 // The current AES hash function passes Smhasher. Our fallback
56 // hash functions don't, so we only enable the difficult tests when
57 // we know the AES implementation is available.
60 // hash should not depend on values outside key.
61 // hash should not depend on alignment.
62 func TestSmhasherSanity(t
*testing
.T
) {
63 r
:= rand
.New(rand
.NewSource(1234))
68 for k
:= 0; k
< REP
; k
++ {
69 for n
:= 0; n
< KEYMAX
; n
++ {
70 for i
:= 0; i
< OFFMAX
; i
++ {
71 var b
[KEYMAX
+ OFFMAX
+ 2*PAD
]byte
72 var c
[KEYMAX
+ OFFMAX
+ 2*PAD
]byte
75 copy(c
[PAD
+i
:PAD
+i
+n
], b
[PAD
:PAD
+n
])
76 if BytesHash(b
[PAD
:PAD
+n
], 0) != BytesHash(c
[PAD
+i
:PAD
+i
+n
], 0) {
77 t
.Errorf("hash depends on bytes outside key")
85 m
map[uintptr]struct{} // set of hashes added
86 n
int // number of hashes added
89 func newHashSet() *HashSet
{
90 return &HashSet
{make(map[uintptr]struct{}), 0}
92 func (s
*HashSet
) add(h
uintptr) {
96 func (s
*HashSet
) addS(x
string) {
97 s
.add(StringHash(x
, 0))
99 func (s
*HashSet
) addB(x
[]byte) {
100 s
.add(BytesHash(x
, 0))
102 func (s
*HashSet
) addS_seed(x
string, seed
uintptr) {
103 s
.add(StringHash(x
, seed
))
105 func (s
*HashSet
) check(t
*testing
.T
) {
107 collisions
:= s
.n
- len(s
.m
)
108 //fmt.Printf("%d/%d\n", len(s.m), s.n)
109 pairs
:= int64(s
.n
) * int64(s
.n
-1) / 2
110 expected
:= float64(pairs
) / math
.Pow(2.0, float64(hashSize
))
111 stddev
:= math
.Sqrt(expected
)
112 if float64(collisions
) > expected
+SLOP
*(3*stddev
+1) {
113 t
.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions
, expected
, stddev
)
117 // a string plus adding zeros must make distinct hashes
118 func TestSmhasherAppendedZeros(t
*testing
.T
) {
119 s
:= "hello" + strings
.Repeat("\x00", 256)
121 for i
:= 0; i
<= len(s
); i
++ {
127 // All 0-3 byte strings have distinct hashes.
128 func TestSmhasherSmallKeys(t
*testing
.T
) {
131 for i
:= 0; i
< 256; i
++ {
134 for j
:= 0; j
< 256; j
++ {
137 if !testing
.Short() {
138 for k
:= 0; k
< 256; k
++ {
148 // Different length strings of all zeros have distinct hashes.
149 func TestSmhasherZeros(t
*testing
.T
) {
156 for i
:= 0; i
<= N
; i
++ {
162 // Strings with up to two nonzero bytes all have distinct hashes.
163 func TestSmhasherTwoNonzero(t
*testing
.T
) {
165 t
.Skip("Skipping in short mode")
168 for n
:= 2; n
<= 16; n
++ {
173 func twoNonZero(h
*HashSet
, n
int) {
180 for i
:= 0; i
< n
; i
++ {
181 for x
:= 1; x
< 256; x
++ {
188 // two non-zero bytes
189 for i
:= 0; i
< n
; i
++ {
190 for x
:= 1; x
< 256; x
++ {
192 for j
:= i
+ 1; j
< n
; j
++ {
193 for y
:= 1; y
< 256; y
++ {
204 // Test strings with repeats, like "abcdabcdabcdabcd..."
205 func TestSmhasherCyclic(t
*testing
.T
) {
207 t
.Skip("Skipping in short mode")
209 r
:= rand
.New(rand
.NewSource(1234))
212 for n
:= 4; n
<= 12; n
++ {
214 b
:= make([]byte, REPEAT
*n
)
215 for i
:= 0; i
< N
; i
++ {
216 b
[0] = byte(i
* 79 % 97)
217 b
[1] = byte(i
* 43 % 137)
218 b
[2] = byte(i
* 151 % 197)
219 b
[3] = byte(i
* 199 % 251)
221 for j
:= n
; j
< n
*REPEAT
; j
++ {
230 // Test strings with only a few bits set
231 func TestSmhasherSparse(t
*testing
.T
) {
233 t
.Skip("Skipping in short mode")
244 func sparse(t
*testing
.T
, n
int, k
int) {
245 b
:= make([]byte, n
/8)
251 // set up to k bits at index i and greater
252 func setbits(h
*HashSet
, b
[]byte, i
int, k
int) {
257 for j
:= i
; j
< len(b
)*8; j
++ {
258 b
[j
/8] |
= byte(1 << uint(j
&7))
259 setbits(h
, b
, j
+1, k
-1)
260 b
[j
/8] &= byte(^(1 << uint(j
&7)))
264 // Test all possible combinations of n blocks from the set s.
265 // "permutation" is a bad name here, but it is what Smhasher uses.
266 func TestSmhasherPermutation(t
*testing
.T
) {
268 t
.Skip("Skipping in short mode")
270 permutation(t
, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
271 permutation(t
, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
272 permutation(t
, []uint32{0, 1}, 20)
273 permutation(t
, []uint32{0, 1 << 31}, 20)
274 permutation(t
, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
276 func permutation(t
*testing
.T
, s
[]uint32, n
int) {
277 b
:= make([]byte, n
*4)
282 func genPerm(h
*HashSet
, b
[]byte, s
[]uint32, n
int) {
287 for _
, v
:= range s
{
289 b
[n
+1] = byte(v
>> 8)
290 b
[n
+2] = byte(v
>> 16)
291 b
[n
+3] = byte(v
>> 24)
292 genPerm(h
, b
, s
, n
+4)
297 clear() // set bits all to 0
298 random(r
*rand
.Rand
) // set key to something random
299 bits() int // how many bits key has
300 flipBit(i
int) // flip bit i of the key
301 hash() uintptr // hash the key
302 name() string // for error reporting
305 type BytesKey
struct {
309 func (k
*BytesKey
) clear() {
314 func (k
*BytesKey
) random(r
*rand
.Rand
) {
317 func (k
*BytesKey
) bits() int {
320 func (k
*BytesKey
) flipBit(i
int) {
321 k
.b
[i
>>3] ^= byte(1 << uint(i
&7))
323 func (k
*BytesKey
) hash() uintptr {
324 return BytesHash(k
.b
, 0)
326 func (k
*BytesKey
) name() string {
327 return fmt
.Sprintf("bytes%d", len(k
.b
))
330 type Int32Key
struct {
334 func (k
*Int32Key
) clear() {
337 func (k
*Int32Key
) random(r
*rand
.Rand
) {
340 func (k
*Int32Key
) bits() int {
343 func (k
*Int32Key
) flipBit(i
int) {
346 func (k
*Int32Key
) hash() uintptr {
347 return Int32Hash(k
.i
, 0)
349 func (k
*Int32Key
) name() string {
353 type Int64Key
struct {
357 func (k
*Int64Key
) clear() {
360 func (k
*Int64Key
) random(r
*rand
.Rand
) {
361 k
.i
= uint64(r
.Uint32()) + uint64(r
.Uint32())<<32
363 func (k
*Int64Key
) bits() int {
366 func (k
*Int64Key
) flipBit(i
int) {
369 func (k
*Int64Key
) hash() uintptr {
370 return Int64Hash(k
.i
, 0)
372 func (k
*Int64Key
) name() string {
376 type EfaceKey
struct {
380 func (k
*EfaceKey
) clear() {
383 func (k
*EfaceKey
) random(r
*rand
.Rand
) {
384 k
.i
= uint64(r
.Int63())
386 func (k
*EfaceKey
) bits() int {
387 // use 64 bits. This tests inlined interfaces
388 // on 64-bit targets and indirect interfaces on
392 func (k
*EfaceKey
) flipBit(i
int) {
393 k
.i
= k
.i
.(uint64) ^ uint64(1)<<uint(i
)
395 func (k
*EfaceKey
) hash() uintptr {
396 return EfaceHash(k
.i
, 0)
398 func (k
*EfaceKey
) name() string {
402 type IfaceKey
struct {
409 func (x fInter
) F() {
412 func (k
*IfaceKey
) clear() {
415 func (k
*IfaceKey
) random(r
*rand
.Rand
) {
416 k
.i
= fInter(r
.Int63())
418 func (k
*IfaceKey
) bits() int {
419 // use 64 bits. This tests inlined interfaces
420 // on 64-bit targets and indirect interfaces on
424 func (k
*IfaceKey
) flipBit(i
int) {
425 k
.i
= k
.i
.(fInter
) ^ fInter(1)<<uint(i
)
427 func (k
*IfaceKey
) hash() uintptr {
428 return IfaceHash(k
.i
, 0)
430 func (k
*IfaceKey
) name() string {
434 // Flipping a single bit of a key should flip each output bit with 50% probability.
435 func TestSmhasherAvalanche(t
*testing
.T
) {
437 t
.Skip("Skipping in short mode")
439 avalancheTest1(t
, &BytesKey
{make([]byte, 2)})
440 avalancheTest1(t
, &BytesKey
{make([]byte, 4)})
441 avalancheTest1(t
, &BytesKey
{make([]byte, 8)})
442 avalancheTest1(t
, &BytesKey
{make([]byte, 16)})
443 avalancheTest1(t
, &BytesKey
{make([]byte, 32)})
444 avalancheTest1(t
, &BytesKey
{make([]byte, 200)})
445 avalancheTest1(t
, &Int32Key
{})
446 avalancheTest1(t
, &Int64Key
{})
447 avalancheTest1(t
, &EfaceKey
{})
448 avalancheTest1(t
, &IfaceKey
{})
450 func avalancheTest1(t
*testing
.T
, k Key
) {
452 r
:= rand
.New(rand
.NewSource(1234))
455 // grid[i][j] is a count of whether flipping
456 // input bit i affects output bit j.
457 grid
:= make([][hashSize
]int, n
)
459 for z
:= 0; z
< REP
; z
++ {
460 // pick a random key, hash it
464 // flip each bit, hash & compare the results
465 for i
:= 0; i
< n
; i
++ {
470 // record the effects of that bit flip
472 for j
:= 0; j
< hashSize
; j
++ {
479 // Each entry in the grid should be about REP/2.
480 // More precisely, we did N = k.bits() * hashSize experiments where
481 // each is the sum of REP coin flips. We want to find bounds on the
482 // sum of coin flips such that a truly random experiment would have
483 // all sums inside those bounds with 99% probability.
486 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
487 for c
= 0.0; math
.Pow(math
.Erf(c
/math
.Sqrt(2)), float64(N
)) < .9999; c
+= .1 {
489 c
*= 4.0 // allowed slack - we don't need to be perfectly random
491 stddev
:= .5 * math
.Sqrt(REP
)
492 low
:= int(mean
- c
*stddev
)
493 high
:= int(mean
+ c
*stddev
)
494 for i
:= 0; i
< n
; i
++ {
495 for j
:= 0; j
< hashSize
; j
++ {
497 if x
< low || x
> high
{
498 t
.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k
.name(), i
, j
, x
, REP
)
504 // All bit rotations of a set of distinct keys
505 func TestSmhasherWindowed(t
*testing
.T
) {
506 windowed(t
, &Int32Key
{})
507 windowed(t
, &Int64Key
{})
508 windowed(t
, &BytesKey
{make([]byte, 128)})
510 func windowed(t
*testing
.T
, k Key
) {
512 t
.Skip("Skipping in short mode")
516 for r
:= 0; r
< k
.bits(); r
++ {
518 for i
:= 0; i
< 1<<BITS
; i
++ {
520 for j
:= 0; j
< BITS
; j
++ {
521 if i
>>uint(j
)&1 != 0 {
522 k
.flipBit((j
+ r
) % k
.bits())
531 // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
532 func TestSmhasherText(t
*testing
.T
) {
534 t
.Skip("Skipping in short mode")
536 text(t
, "Foo", "Bar")
537 text(t
, "FooBar", "")
538 text(t
, "", "FooBar")
540 func text(t
*testing
.T
, prefix
, suffix
string) {
542 const S
= "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
544 b
:= make([]byte, len(prefix
)+N
+len(suffix
))
546 copy(b
[len(prefix
)+N
:], suffix
)
549 for i
:= 0; i
< L
; i
++ {
551 for j
:= 0; j
< L
; j
++ {
553 for k
:= 0; k
< L
; k
++ {
555 for x
:= 0; x
< L
; x
++ {
565 // Make sure different seed values generate different hashes.
566 func TestSmhasherSeed(t
*testing
.T
) {
570 for i
:= 0; i
< N
; i
++ {
571 h
.addS_seed(s
, uintptr(i
))
576 // size of the hash output (32 or 64 bits)
577 const hashSize
= 32 + int(^uintptr(0)>>63<<5)
579 func randBytes(r
*rand
.Rand
, b
[]byte) {
581 b
[i
] = byte(r
.Uint32())
585 func benchmarkHash(b
*testing
.B
, n
int) {
586 s
:= strings
.Repeat("A", n
)
588 for i
:= 0; i
< b
.N
; i
++ {
594 func BenchmarkHash5(b
*testing
.B
) { benchmarkHash(b
, 5) }
595 func BenchmarkHash16(b
*testing
.B
) { benchmarkHash(b
, 16) }
596 func BenchmarkHash64(b
*testing
.B
) { benchmarkHash(b
, 64) }
597 func BenchmarkHash1024(b
*testing
.B
) { benchmarkHash(b
, 1024) }
598 func BenchmarkHash65536(b
*testing
.B
) { benchmarkHash(b
, 65536) }
600 func TestArrayHash(t
*testing
.T
) {
601 if Compiler
== "gccgo" {
602 t
.Skip("does not work on gccgo without better escape analysis")
605 // Make sure that "" in arrays hash correctly. The hash
606 // should at least scramble the input seed so that, e.g.,
607 // {"","foo"} and {"foo",""} have different hashes.
609 // If the hash is bad, then all (8 choose 4) = 70 keys
610 // have the same hash. If so, we allocate 70/8 = 8
611 // overflow buckets. If the hash is good we don't
612 // normally allocate any overflow buckets, and the
613 // probability of even one or two overflows goes down rapidly.
614 // (There is always 1 allocation of the bucket array. The map
615 // header is allocated on the stack.)
617 // Make the key type at most 128 bytes. Otherwise,
618 // we get an allocation per key.
620 m
:= make(map[key
]bool, 70)
622 // fill m with keys that have 4 "foo"s and 4 ""s.
623 for i
:= 0; i
< 256; i
++ {
626 for j
:= uint(0); j
< 8; j
++ {
637 t
.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m
))
640 if n
:= testing
.AllocsPerRun(10, f
); n
> 6 {
641 t
.Errorf("too many allocs %f - hash not balanced", n
)
644 func TestStructHash(t
*testing
.T
) {
645 // See the comment in TestArrayHash.
648 a
, b
, c
, d
, e
, f
, g
, h
string
650 m
:= make(map[key
]bool, 70)
652 // fill m with keys that have 4 "foo"s and 4 ""s.
653 for i
:= 0; i
< 256; i
++ {
693 t
.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m
))
696 if n
:= testing
.AllocsPerRun(10, f
); n
> 6 {
697 t
.Errorf("too many allocs %f - hash not balanced", n
)
703 func BenchmarkAlignedLoad(b
*testing
.B
) {
705 p
:= unsafe
.Pointer(&buf
[0])
707 for i
:= 0; i
< b
.N
; i
++ {
708 s
+= ReadUnaligned64(p
)
713 func BenchmarkUnalignedLoad(b
*testing
.B
) {
715 p
:= unsafe
.Pointer(&buf
[1])
717 for i
:= 0; i
< b
.N
; i
++ {
718 s
+= ReadUnaligned64(p
)
723 func TestCollisions(t
*testing
.T
) {
725 t
.Skip("Skipping in short mode")
727 for i
:= 0; i
< 16; i
++ {
728 for j
:= 0; j
< 16; j
++ {
733 m
:= make(map[uint16]struct{}, 1<<16)
734 for n
:= 0; n
< 1<<16; n
++ {
737 m
[uint16(BytesHash(a
[:], 0))] = struct{}{}
740 t
.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i
, j
, len(m
))