1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 func TestMemHash32Equality(t
*testing
.T
) {
19 t
.Skip("skipping since AES hash implementation is used")
22 r
:= rand
.New(rand
.NewSource(1234))
23 seed
:= uintptr(r
.Uint64())
24 for i
:= 0; i
< 100; i
++ {
26 got
:= MemHash32(unsafe
.Pointer(&b
), seed
)
27 want
:= MemHash(unsafe
.Pointer(&b
), seed
, 4)
29 t
.Errorf("MemHash32(%x, %v) = %v; want %v", b
, seed
, got
, want
)
34 func TestMemHash64Equality(t
*testing
.T
) {
36 t
.Skip("skipping since AES hash implementation is used")
39 r
:= rand
.New(rand
.NewSource(1234))
40 seed
:= uintptr(r
.Uint64())
41 for i
:= 0; i
< 100; i
++ {
43 got
:= MemHash64(unsafe
.Pointer(&b
), seed
)
44 want
:= MemHash(unsafe
.Pointer(&b
), seed
, 8)
46 t
.Errorf("MemHash64(%x, %v) = %v; want %v", b
, seed
, got
, want
)
51 // Smhasher is a torture test for hash functions.
52 // https://code.google.com/p/smhasher/
53 // This code is a port of some of the Smhasher tests to Go.
55 // The current AES hash function passes Smhasher. Our fallback
56 // hash functions don't, so we only enable the difficult tests when
57 // we know the AES implementation is available.
60 // hash should not depend on values outside key.
61 // hash should not depend on alignment.
62 func TestSmhasherSanity(t
*testing
.T
) {
63 r
:= rand
.New(rand
.NewSource(1234))
68 for k
:= 0; k
< REP
; k
++ {
69 for n
:= 0; n
< KEYMAX
; n
++ {
70 for i
:= 0; i
< OFFMAX
; i
++ {
71 var b
[KEYMAX
+ OFFMAX
+ 2*PAD
]byte
72 var c
[KEYMAX
+ OFFMAX
+ 2*PAD
]byte
75 copy(c
[PAD
+i
:PAD
+i
+n
], b
[PAD
:PAD
+n
])
76 if BytesHash(b
[PAD
:PAD
+n
], 0) != BytesHash(c
[PAD
+i
:PAD
+i
+n
], 0) {
77 t
.Errorf("hash depends on bytes outside key")
85 m
map[uintptr]struct{} // set of hashes added
86 n
int // number of hashes added
89 func newHashSet() *HashSet
{
90 return &HashSet
{make(map[uintptr]struct{}), 0}
92 func (s
*HashSet
) add(h
uintptr) {
96 func (s
*HashSet
) addS(x
string) {
97 s
.add(StringHash(x
, 0))
99 func (s
*HashSet
) addB(x
[]byte) {
100 s
.add(BytesHash(x
, 0))
102 func (s
*HashSet
) addS_seed(x
string, seed
uintptr) {
103 s
.add(StringHash(x
, seed
))
105 func (s
*HashSet
) check(t
*testing
.T
) {
107 collisions
:= s
.n
- len(s
.m
)
108 pairs
:= int64(s
.n
) * int64(s
.n
-1) / 2
109 expected
:= float64(pairs
) / math
.Pow(2.0, float64(hashSize
))
110 stddev
:= math
.Sqrt(expected
)
111 if float64(collisions
) > expected
+SLOP
*(3*stddev
+1) {
112 t
.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions
, expected
, stddev
, expected
+SLOP
*(3*stddev
+1))
116 // a string plus adding zeros must make distinct hashes
117 func TestSmhasherAppendedZeros(t
*testing
.T
) {
118 s
:= "hello" + strings
.Repeat("\x00", 256)
120 for i
:= 0; i
<= len(s
); i
++ {
126 // All 0-3 byte strings have distinct hashes.
127 func TestSmhasherSmallKeys(t
*testing
.T
) {
130 for i
:= 0; i
< 256; i
++ {
133 for j
:= 0; j
< 256; j
++ {
136 if !testing
.Short() {
137 for k
:= 0; k
< 256; k
++ {
147 // Different length strings of all zeros have distinct hashes.
148 func TestSmhasherZeros(t
*testing
.T
) {
155 for i
:= 0; i
<= N
; i
++ {
161 // Strings with up to two nonzero bytes all have distinct hashes.
162 func TestSmhasherTwoNonzero(t
*testing
.T
) {
163 if GOARCH
== "wasm" {
164 t
.Skip("Too slow on wasm")
167 t
.Skip("Skipping in short mode")
170 for n
:= 2; n
<= 16; n
++ {
175 func twoNonZero(h
*HashSet
, n
int) {
182 for i
:= 0; i
< n
; i
++ {
183 for x
:= 1; x
< 256; x
++ {
190 // two non-zero bytes
191 for i
:= 0; i
< n
; i
++ {
192 for x
:= 1; x
< 256; x
++ {
194 for j
:= i
+ 1; j
< n
; j
++ {
195 for y
:= 1; y
< 256; y
++ {
206 // Test strings with repeats, like "abcdabcdabcdabcd..."
207 func TestSmhasherCyclic(t
*testing
.T
) {
209 t
.Skip("Skipping in short mode")
211 r
:= rand
.New(rand
.NewSource(1234))
214 for n
:= 4; n
<= 12; n
++ {
216 b
:= make([]byte, REPEAT
*n
)
217 for i
:= 0; i
< N
; i
++ {
218 b
[0] = byte(i
* 79 % 97)
219 b
[1] = byte(i
* 43 % 137)
220 b
[2] = byte(i
* 151 % 197)
221 b
[3] = byte(i
* 199 % 251)
223 for j
:= n
; j
< n
*REPEAT
; j
++ {
232 // Test strings with only a few bits set
233 func TestSmhasherSparse(t
*testing
.T
) {
234 if GOARCH
== "wasm" {
235 t
.Skip("Too slow on wasm")
238 t
.Skip("Skipping in short mode")
249 func sparse(t
*testing
.T
, n
int, k
int) {
250 b
:= make([]byte, n
/8)
256 // set up to k bits at index i and greater
257 func setbits(h
*HashSet
, b
[]byte, i
int, k
int) {
262 for j
:= i
; j
< len(b
)*8; j
++ {
263 b
[j
/8] |
= byte(1 << uint(j
&7))
264 setbits(h
, b
, j
+1, k
-1)
265 b
[j
/8] &= byte(^(1 << uint(j
&7)))
269 // Test all possible combinations of n blocks from the set s.
270 // "permutation" is a bad name here, but it is what Smhasher uses.
271 func TestSmhasherPermutation(t
*testing
.T
) {
272 if GOARCH
== "wasm" {
273 t
.Skip("Too slow on wasm")
276 t
.Skip("Skipping in short mode")
278 permutation(t
, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
279 permutation(t
, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
280 permutation(t
, []uint32{0, 1}, 20)
281 permutation(t
, []uint32{0, 1 << 31}, 20)
282 permutation(t
, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
284 func permutation(t
*testing
.T
, s
[]uint32, n
int) {
285 b
:= make([]byte, n
*4)
290 func genPerm(h
*HashSet
, b
[]byte, s
[]uint32, n
int) {
295 for _
, v
:= range s
{
297 b
[n
+1] = byte(v
>> 8)
298 b
[n
+2] = byte(v
>> 16)
299 b
[n
+3] = byte(v
>> 24)
300 genPerm(h
, b
, s
, n
+4)
305 clear() // set bits all to 0
306 random(r
*rand
.Rand
) // set key to something random
307 bits() int // how many bits key has
308 flipBit(i
int) // flip bit i of the key
309 hash() uintptr // hash the key
310 name() string // for error reporting
313 type BytesKey
struct {
317 func (k
*BytesKey
) clear() {
322 func (k
*BytesKey
) random(r
*rand
.Rand
) {
325 func (k
*BytesKey
) bits() int {
328 func (k
*BytesKey
) flipBit(i
int) {
329 k
.b
[i
>>3] ^= byte(1 << uint(i
&7))
331 func (k
*BytesKey
) hash() uintptr {
332 return BytesHash(k
.b
, 0)
334 func (k
*BytesKey
) name() string {
335 return fmt
.Sprintf("bytes%d", len(k
.b
))
338 type Int32Key
struct {
342 func (k
*Int32Key
) clear() {
345 func (k
*Int32Key
) random(r
*rand
.Rand
) {
348 func (k
*Int32Key
) bits() int {
351 func (k
*Int32Key
) flipBit(i
int) {
354 func (k
*Int32Key
) hash() uintptr {
355 return Int32Hash(k
.i
, 0)
357 func (k
*Int32Key
) name() string {
361 type Int64Key
struct {
365 func (k
*Int64Key
) clear() {
368 func (k
*Int64Key
) random(r
*rand
.Rand
) {
369 k
.i
= uint64(r
.Uint32()) + uint64(r
.Uint32())<<32
371 func (k
*Int64Key
) bits() int {
374 func (k
*Int64Key
) flipBit(i
int) {
377 func (k
*Int64Key
) hash() uintptr {
378 return Int64Hash(k
.i
, 0)
380 func (k
*Int64Key
) name() string {
384 type EfaceKey
struct {
388 func (k
*EfaceKey
) clear() {
391 func (k
*EfaceKey
) random(r
*rand
.Rand
) {
392 k
.i
= uint64(r
.Int63())
394 func (k
*EfaceKey
) bits() int {
395 // use 64 bits. This tests inlined interfaces
396 // on 64-bit targets and indirect interfaces on
400 func (k
*EfaceKey
) flipBit(i
int) {
401 k
.i
= k
.i
.(uint64) ^ uint64(1)<<uint(i
)
403 func (k
*EfaceKey
) hash() uintptr {
404 return EfaceHash(k
.i
, 0)
406 func (k
*EfaceKey
) name() string {
410 type IfaceKey
struct {
417 func (x fInter
) F() {
420 func (k
*IfaceKey
) clear() {
423 func (k
*IfaceKey
) random(r
*rand
.Rand
) {
424 k
.i
= fInter(r
.Int63())
426 func (k
*IfaceKey
) bits() int {
427 // use 64 bits. This tests inlined interfaces
428 // on 64-bit targets and indirect interfaces on
432 func (k
*IfaceKey
) flipBit(i
int) {
433 k
.i
= k
.i
.(fInter
) ^ fInter(1)<<uint(i
)
435 func (k
*IfaceKey
) hash() uintptr {
436 return IfaceHash(k
.i
, 0)
438 func (k
*IfaceKey
) name() string {
442 // Flipping a single bit of a key should flip each output bit with 50% probability.
443 func TestSmhasherAvalanche(t
*testing
.T
) {
444 if GOARCH
== "wasm" {
445 t
.Skip("Too slow on wasm")
448 t
.Skip("Skipping in short mode")
450 avalancheTest1(t
, &BytesKey
{make([]byte, 2)})
451 avalancheTest1(t
, &BytesKey
{make([]byte, 4)})
452 avalancheTest1(t
, &BytesKey
{make([]byte, 8)})
453 avalancheTest1(t
, &BytesKey
{make([]byte, 16)})
454 avalancheTest1(t
, &BytesKey
{make([]byte, 32)})
455 avalancheTest1(t
, &BytesKey
{make([]byte, 200)})
456 avalancheTest1(t
, &Int32Key
{})
457 avalancheTest1(t
, &Int64Key
{})
458 avalancheTest1(t
, &EfaceKey
{})
459 avalancheTest1(t
, &IfaceKey
{})
461 func avalancheTest1(t
*testing
.T
, k Key
) {
463 r
:= rand
.New(rand
.NewSource(1234))
466 // grid[i][j] is a count of whether flipping
467 // input bit i affects output bit j.
468 grid
:= make([][hashSize
]int, n
)
470 for z
:= 0; z
< REP
; z
++ {
471 // pick a random key, hash it
475 // flip each bit, hash & compare the results
476 for i
:= 0; i
< n
; i
++ {
481 // record the effects of that bit flip
483 for j
:= 0; j
< hashSize
; j
++ {
490 // Each entry in the grid should be about REP/2.
491 // More precisely, we did N = k.bits() * hashSize experiments where
492 // each is the sum of REP coin flips. We want to find bounds on the
493 // sum of coin flips such that a truly random experiment would have
494 // all sums inside those bounds with 99% probability.
497 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
498 for c
= 0.0; math
.Pow(math
.Erf(c
/math
.Sqrt(2)), float64(N
)) < .9999; c
+= .1 {
500 c
*= 4.0 // allowed slack - we don't need to be perfectly random
502 stddev
:= .5 * math
.Sqrt(REP
)
503 low
:= int(mean
- c
*stddev
)
504 high
:= int(mean
+ c
*stddev
)
505 for i
:= 0; i
< n
; i
++ {
506 for j
:= 0; j
< hashSize
; j
++ {
508 if x
< low || x
> high
{
509 t
.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k
.name(), i
, j
, x
, REP
)
515 // All bit rotations of a set of distinct keys
516 func TestSmhasherWindowed(t
*testing
.T
) {
517 t
.Logf("32 bit keys")
518 windowed(t
, &Int32Key
{})
519 t
.Logf("64 bit keys")
520 windowed(t
, &Int64Key
{})
521 t
.Logf("string keys")
522 windowed(t
, &BytesKey
{make([]byte, 128)})
524 func windowed(t
*testing
.T
, k Key
) {
525 if GOARCH
== "wasm" {
526 t
.Skip("Too slow on wasm")
529 t
.Skip("Skipping in short mode")
533 for r
:= 0; r
< k
.bits(); r
++ {
535 for i
:= 0; i
< 1<<BITS
; i
++ {
537 for j
:= 0; j
< BITS
; j
++ {
538 if i
>>uint(j
)&1 != 0 {
539 k
.flipBit((j
+ r
) % k
.bits())
548 // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
549 func TestSmhasherText(t
*testing
.T
) {
551 t
.Skip("Skipping in short mode")
553 text(t
, "Foo", "Bar")
554 text(t
, "FooBar", "")
555 text(t
, "", "FooBar")
557 func text(t
*testing
.T
, prefix
, suffix
string) {
559 const S
= "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
561 b
:= make([]byte, len(prefix
)+N
+len(suffix
))
563 copy(b
[len(prefix
)+N
:], suffix
)
566 for i
:= 0; i
< L
; i
++ {
568 for j
:= 0; j
< L
; j
++ {
570 for k
:= 0; k
< L
; k
++ {
572 for x
:= 0; x
< L
; x
++ {
582 // Make sure different seed values generate different hashes.
583 func TestSmhasherSeed(t
*testing
.T
) {
587 for i
:= 0; i
< N
; i
++ {
588 h
.addS_seed(s
, uintptr(i
))
593 // size of the hash output (32 or 64 bits)
594 const hashSize
= 32 + int(^uintptr(0)>>63<<5)
596 func randBytes(r
*rand
.Rand
, b
[]byte) {
598 b
[i
] = byte(r
.Uint32())
602 func benchmarkHash(b
*testing
.B
, n
int) {
603 s
:= strings
.Repeat("A", n
)
605 for i
:= 0; i
< b
.N
; i
++ {
611 func BenchmarkHash5(b
*testing
.B
) { benchmarkHash(b
, 5) }
612 func BenchmarkHash16(b
*testing
.B
) { benchmarkHash(b
, 16) }
613 func BenchmarkHash64(b
*testing
.B
) { benchmarkHash(b
, 64) }
614 func BenchmarkHash1024(b
*testing
.B
) { benchmarkHash(b
, 1024) }
615 func BenchmarkHash65536(b
*testing
.B
) { benchmarkHash(b
, 65536) }
617 func TestArrayHash(t
*testing
.T
) {
618 if Compiler
== "gccgo" {
619 t
.Skip("does not work on gccgo without better escape analysis")
622 // Make sure that "" in arrays hash correctly. The hash
623 // should at least scramble the input seed so that, e.g.,
624 // {"","foo"} and {"foo",""} have different hashes.
626 // If the hash is bad, then all (8 choose 4) = 70 keys
627 // have the same hash. If so, we allocate 70/8 = 8
628 // overflow buckets. If the hash is good we don't
629 // normally allocate any overflow buckets, and the
630 // probability of even one or two overflows goes down rapidly.
631 // (There is always 1 allocation of the bucket array. The map
632 // header is allocated on the stack.)
634 // Make the key type at most 128 bytes. Otherwise,
635 // we get an allocation per key.
637 m
:= make(map[key
]bool, 70)
639 // fill m with keys that have 4 "foo"s and 4 ""s.
640 for i
:= 0; i
< 256; i
++ {
643 for j
:= uint(0); j
< 8; j
++ {
654 t
.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m
))
657 if n
:= testing
.AllocsPerRun(10, f
); n
> 6 {
658 t
.Errorf("too many allocs %f - hash not balanced", n
)
661 func TestStructHash(t
*testing
.T
) {
662 // See the comment in TestArrayHash.
665 a
, b
, c
, d
, e
, f
, g
, h
string
667 m
:= make(map[key
]bool, 70)
669 // fill m with keys that have 4 "foo"s and 4 ""s.
670 for i
:= 0; i
< 256; i
++ {
710 t
.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m
))
713 if n
:= testing
.AllocsPerRun(10, f
); n
> 6 {
714 t
.Errorf("too many allocs %f - hash not balanced", n
)
720 func BenchmarkAlignedLoad(b
*testing
.B
) {
722 p
:= unsafe
.Pointer(&buf
[0])
724 for i
:= 0; i
< b
.N
; i
++ {
725 s
+= ReadUnaligned64(p
)
730 func BenchmarkUnalignedLoad(b
*testing
.B
) {
732 p
:= unsafe
.Pointer(&buf
[1])
734 for i
:= 0; i
< b
.N
; i
++ {
735 s
+= ReadUnaligned64(p
)
740 func TestCollisions(t
*testing
.T
) {
742 t
.Skip("Skipping in short mode")
744 for i
:= 0; i
< 16; i
++ {
745 for j
:= 0; j
< 16; j
++ {
750 m
:= make(map[uint16]struct{}, 1<<16)
751 for n
:= 0; n
< 1<<16; n
++ {
754 m
[uint16(BytesHash(a
[:], 0))] = struct{}{}
757 t
.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i
, j
, len(m
))