1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 // Smhasher is a torture test for hash functions.
18 // https://code.google.com/p/smhasher/
19 // This code is a port of some of the Smhasher tests to Go.
21 // The current AES hash function passes Smhasher. Our fallback
22 // hash functions don't, so we only enable the difficult tests when
23 // we know the AES implementation is available.
26 // hash should not depend on values outside key.
27 // hash should not depend on alignment.
28 func TestSmhasherSanity(t
*testing
.T
) {
29 r
:= rand
.New(rand
.NewSource(1234))
34 for k
:= 0; k
< REP
; k
++ {
35 for n
:= 0; n
< KEYMAX
; n
++ {
36 for i
:= 0; i
< OFFMAX
; i
++ {
37 var b
[KEYMAX
+ OFFMAX
+ 2*PAD
]byte
38 var c
[KEYMAX
+ OFFMAX
+ 2*PAD
]byte
41 copy(c
[PAD
+i
:PAD
+i
+n
], b
[PAD
:PAD
+n
])
42 if BytesHash(b
[PAD
:PAD
+n
], 0) != BytesHash(c
[PAD
+i
:PAD
+i
+n
], 0) {
43 t
.Errorf("hash depends on bytes outside key")
51 m
map[uintptr]struct{} // set of hashes added
52 n
int // number of hashes added
55 func newHashSet() *HashSet
{
56 return &HashSet
{make(map[uintptr]struct{}), 0}
58 func (s
*HashSet
) add(h
uintptr) {
62 func (s
*HashSet
) addS(x
string) {
63 s
.add(StringHash(x
, 0))
65 func (s
*HashSet
) addB(x
[]byte) {
66 s
.add(BytesHash(x
, 0))
68 func (s
*HashSet
) addS_seed(x
string, seed
uintptr) {
69 s
.add(StringHash(x
, seed
))
71 func (s
*HashSet
) check(t
*testing
.T
) {
73 collisions
:= s
.n
- len(s
.m
)
74 //fmt.Printf("%d/%d\n", len(s.m), s.n)
75 pairs
:= int64(s
.n
) * int64(s
.n
-1) / 2
76 expected
:= float64(pairs
) / math
.Pow(2.0, float64(hashSize
))
77 stddev
:= math
.Sqrt(expected
)
78 if float64(collisions
) > expected
+SLOP
*(3*stddev
+1) {
79 t
.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions
, expected
, stddev
)
83 // a string plus adding zeros must make distinct hashes
84 func TestSmhasherAppendedZeros(t
*testing
.T
) {
85 s
:= "hello" + strings
.Repeat("\x00", 256)
87 for i
:= 0; i
<= len(s
); i
++ {
93 // All 0-3 byte strings have distinct hashes.
94 func TestSmhasherSmallKeys(t
*testing
.T
) {
97 for i
:= 0; i
< 256; i
++ {
100 for j
:= 0; j
< 256; j
++ {
103 if !testing
.Short() {
104 for k
:= 0; k
< 256; k
++ {
114 // Different length strings of all zeros have distinct hashes.
115 func TestSmhasherZeros(t
*testing
.T
) {
122 for i
:= 0; i
<= N
; i
++ {
128 // Strings with up to two nonzero bytes all have distinct hashes.
129 func TestSmhasherTwoNonzero(t
*testing
.T
) {
131 t
.Skip("Skipping in short mode")
134 for n
:= 2; n
<= 16; n
++ {
139 func twoNonZero(h
*HashSet
, n
int) {
146 for i
:= 0; i
< n
; i
++ {
147 for x
:= 1; x
< 256; x
++ {
154 // two non-zero bytes
155 for i
:= 0; i
< n
; i
++ {
156 for x
:= 1; x
< 256; x
++ {
158 for j
:= i
+ 1; j
< n
; j
++ {
159 for y
:= 1; y
< 256; y
++ {
170 // Test strings with repeats, like "abcdabcdabcdabcd..."
171 func TestSmhasherCyclic(t
*testing
.T
) {
173 t
.Skip("Skipping in short mode")
175 r
:= rand
.New(rand
.NewSource(1234))
178 for n
:= 4; n
<= 12; n
++ {
180 b
:= make([]byte, REPEAT
*n
)
181 for i
:= 0; i
< N
; i
++ {
182 b
[0] = byte(i
* 79 % 97)
183 b
[1] = byte(i
* 43 % 137)
184 b
[2] = byte(i
* 151 % 197)
185 b
[3] = byte(i
* 199 % 251)
187 for j
:= n
; j
< n
*REPEAT
; j
++ {
196 // Test strings with only a few bits set
197 func TestSmhasherSparse(t
*testing
.T
) {
199 t
.Skip("Skipping in short mode")
210 func sparse(t
*testing
.T
, n
int, k
int) {
211 b
:= make([]byte, n
/8)
217 // set up to k bits at index i and greater
218 func setbits(h
*HashSet
, b
[]byte, i
int, k
int) {
223 for j
:= i
; j
< len(b
)*8; j
++ {
224 b
[j
/8] |
= byte(1 << uint(j
&7))
225 setbits(h
, b
, j
+1, k
-1)
226 b
[j
/8] &= byte(^(1 << uint(j
&7)))
230 // Test all possible combinations of n blocks from the set s.
231 // "permutation" is a bad name here, but it is what Smhasher uses.
232 func TestSmhasherPermutation(t
*testing
.T
) {
234 t
.Skip("Skipping in short mode")
236 permutation(t
, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
237 permutation(t
, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
238 permutation(t
, []uint32{0, 1}, 20)
239 permutation(t
, []uint32{0, 1 << 31}, 20)
240 permutation(t
, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
242 func permutation(t
*testing
.T
, s
[]uint32, n
int) {
243 b
:= make([]byte, n
*4)
248 func genPerm(h
*HashSet
, b
[]byte, s
[]uint32, n
int) {
253 for _
, v
:= range s
{
255 b
[n
+1] = byte(v
>> 8)
256 b
[n
+2] = byte(v
>> 16)
257 b
[n
+3] = byte(v
>> 24)
258 genPerm(h
, b
, s
, n
+4)
263 clear() // set bits all to 0
264 random(r
*rand
.Rand
) // set key to something random
265 bits() int // how many bits key has
266 flipBit(i
int) // flip bit i of the key
267 hash() uintptr // hash the key
268 name() string // for error reporting
271 type BytesKey
struct {
275 func (k
*BytesKey
) clear() {
280 func (k
*BytesKey
) random(r
*rand
.Rand
) {
283 func (k
*BytesKey
) bits() int {
286 func (k
*BytesKey
) flipBit(i
int) {
287 k
.b
[i
>>3] ^= byte(1 << uint(i
&7))
289 func (k
*BytesKey
) hash() uintptr {
290 return BytesHash(k
.b
, 0)
292 func (k
*BytesKey
) name() string {
293 return fmt
.Sprintf("bytes%d", len(k
.b
))
296 type Int32Key
struct {
300 func (k
*Int32Key
) clear() {
303 func (k
*Int32Key
) random(r
*rand
.Rand
) {
306 func (k
*Int32Key
) bits() int {
309 func (k
*Int32Key
) flipBit(i
int) {
312 func (k
*Int32Key
) hash() uintptr {
313 return Int32Hash(k
.i
, 0)
315 func (k
*Int32Key
) name() string {
319 type Int64Key
struct {
323 func (k
*Int64Key
) clear() {
326 func (k
*Int64Key
) random(r
*rand
.Rand
) {
327 k
.i
= uint64(r
.Uint32()) + uint64(r
.Uint32())<<32
329 func (k
*Int64Key
) bits() int {
332 func (k
*Int64Key
) flipBit(i
int) {
335 func (k
*Int64Key
) hash() uintptr {
336 return Int64Hash(k
.i
, 0)
338 func (k
*Int64Key
) name() string {
342 type EfaceKey
struct {
346 func (k
*EfaceKey
) clear() {
349 func (k
*EfaceKey
) random(r
*rand
.Rand
) {
350 k
.i
= uint64(r
.Int63())
352 func (k
*EfaceKey
) bits() int {
353 // use 64 bits. This tests inlined interfaces
354 // on 64-bit targets and indirect interfaces on
358 func (k
*EfaceKey
) flipBit(i
int) {
359 k
.i
= k
.i
.(uint64) ^ uint64(1)<<uint(i
)
361 func (k
*EfaceKey
) hash() uintptr {
362 return EfaceHash(k
.i
, 0)
364 func (k
*EfaceKey
) name() string {
368 type IfaceKey
struct {
375 func (x fInter
) F() {
378 func (k
*IfaceKey
) clear() {
381 func (k
*IfaceKey
) random(r
*rand
.Rand
) {
382 k
.i
= fInter(r
.Int63())
384 func (k
*IfaceKey
) bits() int {
385 // use 64 bits. This tests inlined interfaces
386 // on 64-bit targets and indirect interfaces on
390 func (k
*IfaceKey
) flipBit(i
int) {
391 k
.i
= k
.i
.(fInter
) ^ fInter(1)<<uint(i
)
393 func (k
*IfaceKey
) hash() uintptr {
394 return IfaceHash(k
.i
, 0)
396 func (k
*IfaceKey
) name() string {
400 // Flipping a single bit of a key should flip each output bit with 50% probability.
401 func TestSmhasherAvalanche(t
*testing
.T
) {
403 t
.Skip("Skipping in short mode")
405 avalancheTest1(t
, &BytesKey
{make([]byte, 2)})
406 avalancheTest1(t
, &BytesKey
{make([]byte, 4)})
407 avalancheTest1(t
, &BytesKey
{make([]byte, 8)})
408 avalancheTest1(t
, &BytesKey
{make([]byte, 16)})
409 avalancheTest1(t
, &BytesKey
{make([]byte, 32)})
410 avalancheTest1(t
, &BytesKey
{make([]byte, 200)})
411 avalancheTest1(t
, &Int32Key
{})
412 avalancheTest1(t
, &Int64Key
{})
413 avalancheTest1(t
, &EfaceKey
{})
414 avalancheTest1(t
, &IfaceKey
{})
416 func avalancheTest1(t
*testing
.T
, k Key
) {
418 r
:= rand
.New(rand
.NewSource(1234))
421 // grid[i][j] is a count of whether flipping
422 // input bit i affects output bit j.
423 grid
:= make([][hashSize
]int, n
)
425 for z
:= 0; z
< REP
; z
++ {
426 // pick a random key, hash it
430 // flip each bit, hash & compare the results
431 for i
:= 0; i
< n
; i
++ {
436 // record the effects of that bit flip
438 for j
:= 0; j
< hashSize
; j
++ {
445 // Each entry in the grid should be about REP/2.
446 // More precisely, we did N = k.bits() * hashSize experiments where
447 // each is the sum of REP coin flips. We want to find bounds on the
448 // sum of coin flips such that a truly random experiment would have
449 // all sums inside those bounds with 99% probability.
452 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
453 for c
= 0.0; math
.Pow(math
.Erf(c
/math
.Sqrt(2)), float64(N
)) < .9999; c
+= .1 {
455 c
*= 4.0 // allowed slack - we don't need to be perfectly random
457 stddev
:= .5 * math
.Sqrt(REP
)
458 low
:= int(mean
- c
*stddev
)
459 high
:= int(mean
+ c
*stddev
)
460 for i
:= 0; i
< n
; i
++ {
461 for j
:= 0; j
< hashSize
; j
++ {
463 if x
< low || x
> high
{
464 t
.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k
.name(), i
, j
, x
, REP
)
470 // All bit rotations of a set of distinct keys
471 func TestSmhasherWindowed(t
*testing
.T
) {
472 windowed(t
, &Int32Key
{})
473 windowed(t
, &Int64Key
{})
474 windowed(t
, &BytesKey
{make([]byte, 128)})
476 func windowed(t
*testing
.T
, k Key
) {
478 t
.Skip("Skipping in short mode")
482 for r
:= 0; r
< k
.bits(); r
++ {
484 for i
:= 0; i
< 1<<BITS
; i
++ {
486 for j
:= 0; j
< BITS
; j
++ {
487 if i
>>uint(j
)&1 != 0 {
488 k
.flipBit((j
+ r
) % k
.bits())
497 // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
498 func TestSmhasherText(t
*testing
.T
) {
500 t
.Skip("Skipping in short mode")
502 text(t
, "Foo", "Bar")
503 text(t
, "FooBar", "")
504 text(t
, "", "FooBar")
506 func text(t
*testing
.T
, prefix
, suffix
string) {
508 const S
= "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
510 b
:= make([]byte, len(prefix
)+N
+len(suffix
))
512 copy(b
[len(prefix
)+N
:], suffix
)
515 for i
:= 0; i
< L
; i
++ {
517 for j
:= 0; j
< L
; j
++ {
519 for k
:= 0; k
< L
; k
++ {
521 for x
:= 0; x
< L
; x
++ {
531 // Make sure different seed values generate different hashes.
532 func TestSmhasherSeed(t
*testing
.T
) {
536 for i
:= 0; i
< N
; i
++ {
537 h
.addS_seed(s
, uintptr(i
))
542 // size of the hash output (32 or 64 bits)
543 const hashSize
= 32 + int(^uintptr(0)>>63<<5)
545 func randBytes(r
*rand
.Rand
, b
[]byte) {
547 b
[i
] = byte(r
.Uint32())
551 func benchmarkHash(b
*testing
.B
, n
int) {
552 s
:= strings
.Repeat("A", n
)
554 for i
:= 0; i
< b
.N
; i
++ {
560 func BenchmarkHash5(b
*testing
.B
) { benchmarkHash(b
, 5) }
561 func BenchmarkHash16(b
*testing
.B
) { benchmarkHash(b
, 16) }
562 func BenchmarkHash64(b
*testing
.B
) { benchmarkHash(b
, 64) }
563 func BenchmarkHash1024(b
*testing
.B
) { benchmarkHash(b
, 1024) }
564 func BenchmarkHash65536(b
*testing
.B
) { benchmarkHash(b
, 65536) }
566 func TestArrayHash(t
*testing
.T
) {
567 if Compiler
== "gccgo" {
568 t
.Skip("does not work on gccgo without better escape analysis")
571 // Make sure that "" in arrays hash correctly. The hash
572 // should at least scramble the input seed so that, e.g.,
573 // {"","foo"} and {"foo",""} have different hashes.
575 // If the hash is bad, then all (8 choose 4) = 70 keys
576 // have the same hash. If so, we allocate 70/8 = 8
577 // overflow buckets. If the hash is good we don't
578 // normally allocate any overflow buckets, and the
579 // probability of even one or two overflows goes down rapidly.
580 // (There is always 1 allocation of the bucket array. The map
581 // header is allocated on the stack.)
583 // Make the key type at most 128 bytes. Otherwise,
584 // we get an allocation per key.
586 m
:= make(map[key
]bool, 70)
588 // fill m with keys that have 4 "foo"s and 4 ""s.
589 for i
:= 0; i
< 256; i
++ {
592 for j
:= uint(0); j
< 8; j
++ {
603 t
.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m
))
606 if n
:= testing
.AllocsPerRun(10, f
); n
> 6 {
607 t
.Errorf("too many allocs %f - hash not balanced", n
)
610 func TestStructHash(t
*testing
.T
) {
611 // See the comment in TestArrayHash.
614 a
, b
, c
, d
, e
, f
, g
, h
string
616 m
:= make(map[key
]bool, 70)
618 // fill m with keys that have 4 "foo"s and 4 ""s.
619 for i
:= 0; i
< 256; i
++ {
659 t
.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m
))
662 if n
:= testing
.AllocsPerRun(10, f
); n
> 6 {
663 t
.Errorf("too many allocs %f - hash not balanced", n
)
669 func BenchmarkAlignedLoad(b
*testing
.B
) {
671 p
:= unsafe
.Pointer(&buf
[0])
673 for i
:= 0; i
< b
.N
; i
++ {
674 s
+= ReadUnaligned64(p
)
679 func BenchmarkUnalignedLoad(b
*testing
.B
) {
681 p
:= unsafe
.Pointer(&buf
[1])
683 for i
:= 0; i
< b
.N
; i
++ {
684 s
+= ReadUnaligned64(p
)
689 func TestCollisions(t
*testing
.T
) {
691 t
.Skip("Skipping in short mode")
693 for i
:= 0; i
< 16; i
++ {
694 for j
:= 0; j
< 16; j
++ {
699 m
:= make(map[uint16]struct{}, 1<<16)
700 for n
:= 0; n
< 1<<16; n
++ {
703 m
[uint16(BytesHash(a
[:], 0))] = struct{}{}
706 t
.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i
, j
, len(m
))