libgo: update to go1.9
[official-gcc.git] / libgo / go / runtime / hash_test.go
blob167c49eb5f59a286185c21485e804416942c0def
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package runtime_test
7 import (
8 "fmt"
9 "math"
10 "math/rand"
11 . "runtime"
12 "strings"
13 "testing"
14 "unsafe"
17 // Smhasher is a torture test for hash functions.
18 // https://code.google.com/p/smhasher/
19 // This code is a port of some of the Smhasher tests to Go.
21 // The current AES hash function passes Smhasher. Our fallback
22 // hash functions don't, so we only enable the difficult tests when
23 // we know the AES implementation is available.
25 // Sanity checks.
26 // hash should not depend on values outside key.
27 // hash should not depend on alignment.
28 func TestSmhasherSanity(t *testing.T) {
29 r := rand.New(rand.NewSource(1234))
30 const REP = 10
31 const KEYMAX = 128
32 const PAD = 16
33 const OFFMAX = 16
34 for k := 0; k < REP; k++ {
35 for n := 0; n < KEYMAX; n++ {
36 for i := 0; i < OFFMAX; i++ {
37 var b [KEYMAX + OFFMAX + 2*PAD]byte
38 var c [KEYMAX + OFFMAX + 2*PAD]byte
39 randBytes(r, b[:])
40 randBytes(r, c[:])
41 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
42 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
43 t.Errorf("hash depends on bytes outside key")
50 type HashSet struct {
51 m map[uintptr]struct{} // set of hashes added
52 n int // number of hashes added
55 func newHashSet() *HashSet {
56 return &HashSet{make(map[uintptr]struct{}), 0}
58 func (s *HashSet) add(h uintptr) {
59 s.m[h] = struct{}{}
60 s.n++
62 func (s *HashSet) addS(x string) {
63 s.add(StringHash(x, 0))
65 func (s *HashSet) addB(x []byte) {
66 s.add(BytesHash(x, 0))
68 func (s *HashSet) addS_seed(x string, seed uintptr) {
69 s.add(StringHash(x, seed))
71 func (s *HashSet) check(t *testing.T) {
72 const SLOP = 10.0
73 collisions := s.n - len(s.m)
74 //fmt.Printf("%d/%d\n", len(s.m), s.n)
75 pairs := int64(s.n) * int64(s.n-1) / 2
76 expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
77 stddev := math.Sqrt(expected)
78 if float64(collisions) > expected+SLOP*(3*stddev+1) {
79 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
83 // a string plus adding zeros must make distinct hashes
84 func TestSmhasherAppendedZeros(t *testing.T) {
85 s := "hello" + strings.Repeat("\x00", 256)
86 h := newHashSet()
87 for i := 0; i <= len(s); i++ {
88 h.addS(s[:i])
90 h.check(t)
93 // All 0-3 byte strings have distinct hashes.
94 func TestSmhasherSmallKeys(t *testing.T) {
95 h := newHashSet()
96 var b [3]byte
97 for i := 0; i < 256; i++ {
98 b[0] = byte(i)
99 h.addB(b[:1])
100 for j := 0; j < 256; j++ {
101 b[1] = byte(j)
102 h.addB(b[:2])
103 if !testing.Short() {
104 for k := 0; k < 256; k++ {
105 b[2] = byte(k)
106 h.addB(b[:3])
111 h.check(t)
114 // Different length strings of all zeros have distinct hashes.
115 func TestSmhasherZeros(t *testing.T) {
116 N := 256 * 1024
117 if testing.Short() {
118 N = 1024
120 h := newHashSet()
121 b := make([]byte, N)
122 for i := 0; i <= N; i++ {
123 h.addB(b[:i])
125 h.check(t)
128 // Strings with up to two nonzero bytes all have distinct hashes.
129 func TestSmhasherTwoNonzero(t *testing.T) {
130 if testing.Short() {
131 t.Skip("Skipping in short mode")
133 h := newHashSet()
134 for n := 2; n <= 16; n++ {
135 twoNonZero(h, n)
137 h.check(t)
139 func twoNonZero(h *HashSet, n int) {
140 b := make([]byte, n)
142 // all zero
143 h.addB(b[:])
145 // one non-zero byte
146 for i := 0; i < n; i++ {
147 for x := 1; x < 256; x++ {
148 b[i] = byte(x)
149 h.addB(b[:])
150 b[i] = 0
154 // two non-zero bytes
155 for i := 0; i < n; i++ {
156 for x := 1; x < 256; x++ {
157 b[i] = byte(x)
158 for j := i + 1; j < n; j++ {
159 for y := 1; y < 256; y++ {
160 b[j] = byte(y)
161 h.addB(b[:])
162 b[j] = 0
165 b[i] = 0
170 // Test strings with repeats, like "abcdabcdabcdabcd..."
171 func TestSmhasherCyclic(t *testing.T) {
172 if testing.Short() {
173 t.Skip("Skipping in short mode")
175 r := rand.New(rand.NewSource(1234))
176 const REPEAT = 8
177 const N = 1000000
178 for n := 4; n <= 12; n++ {
179 h := newHashSet()
180 b := make([]byte, REPEAT*n)
181 for i := 0; i < N; i++ {
182 b[0] = byte(i * 79 % 97)
183 b[1] = byte(i * 43 % 137)
184 b[2] = byte(i * 151 % 197)
185 b[3] = byte(i * 199 % 251)
186 randBytes(r, b[4:n])
187 for j := n; j < n*REPEAT; j++ {
188 b[j] = b[j-n]
190 h.addB(b)
192 h.check(t)
196 // Test strings with only a few bits set
197 func TestSmhasherSparse(t *testing.T) {
198 if testing.Short() {
199 t.Skip("Skipping in short mode")
201 sparse(t, 32, 6)
202 sparse(t, 40, 6)
203 sparse(t, 48, 5)
204 sparse(t, 56, 5)
205 sparse(t, 64, 5)
206 sparse(t, 96, 4)
207 sparse(t, 256, 3)
208 sparse(t, 2048, 2)
210 func sparse(t *testing.T, n int, k int) {
211 b := make([]byte, n/8)
212 h := newHashSet()
213 setbits(h, b, 0, k)
214 h.check(t)
217 // set up to k bits at index i and greater
218 func setbits(h *HashSet, b []byte, i int, k int) {
219 h.addB(b)
220 if k == 0 {
221 return
223 for j := i; j < len(b)*8; j++ {
224 b[j/8] |= byte(1 << uint(j&7))
225 setbits(h, b, j+1, k-1)
226 b[j/8] &= byte(^(1 << uint(j&7)))
230 // Test all possible combinations of n blocks from the set s.
231 // "permutation" is a bad name here, but it is what Smhasher uses.
232 func TestSmhasherPermutation(t *testing.T) {
233 if testing.Short() {
234 t.Skip("Skipping in short mode")
236 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
237 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
238 permutation(t, []uint32{0, 1}, 20)
239 permutation(t, []uint32{0, 1 << 31}, 20)
240 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
242 func permutation(t *testing.T, s []uint32, n int) {
243 b := make([]byte, n*4)
244 h := newHashSet()
245 genPerm(h, b, s, 0)
246 h.check(t)
248 func genPerm(h *HashSet, b []byte, s []uint32, n int) {
249 h.addB(b[:n])
250 if n == len(b) {
251 return
253 for _, v := range s {
254 b[n] = byte(v)
255 b[n+1] = byte(v >> 8)
256 b[n+2] = byte(v >> 16)
257 b[n+3] = byte(v >> 24)
258 genPerm(h, b, s, n+4)
262 type Key interface {
263 clear() // set bits all to 0
264 random(r *rand.Rand) // set key to something random
265 bits() int // how many bits key has
266 flipBit(i int) // flip bit i of the key
267 hash() uintptr // hash the key
268 name() string // for error reporting
271 type BytesKey struct {
272 b []byte
275 func (k *BytesKey) clear() {
276 for i := range k.b {
277 k.b[i] = 0
280 func (k *BytesKey) random(r *rand.Rand) {
281 randBytes(r, k.b)
283 func (k *BytesKey) bits() int {
284 return len(k.b) * 8
286 func (k *BytesKey) flipBit(i int) {
287 k.b[i>>3] ^= byte(1 << uint(i&7))
289 func (k *BytesKey) hash() uintptr {
290 return BytesHash(k.b, 0)
292 func (k *BytesKey) name() string {
293 return fmt.Sprintf("bytes%d", len(k.b))
296 type Int32Key struct {
297 i uint32
300 func (k *Int32Key) clear() {
301 k.i = 0
303 func (k *Int32Key) random(r *rand.Rand) {
304 k.i = r.Uint32()
306 func (k *Int32Key) bits() int {
307 return 32
309 func (k *Int32Key) flipBit(i int) {
310 k.i ^= 1 << uint(i)
312 func (k *Int32Key) hash() uintptr {
313 return Int32Hash(k.i, 0)
315 func (k *Int32Key) name() string {
316 return "int32"
319 type Int64Key struct {
320 i uint64
323 func (k *Int64Key) clear() {
324 k.i = 0
326 func (k *Int64Key) random(r *rand.Rand) {
327 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
329 func (k *Int64Key) bits() int {
330 return 64
332 func (k *Int64Key) flipBit(i int) {
333 k.i ^= 1 << uint(i)
335 func (k *Int64Key) hash() uintptr {
336 return Int64Hash(k.i, 0)
338 func (k *Int64Key) name() string {
339 return "int64"
342 type EfaceKey struct {
343 i interface{}
346 func (k *EfaceKey) clear() {
347 k.i = nil
349 func (k *EfaceKey) random(r *rand.Rand) {
350 k.i = uint64(r.Int63())
352 func (k *EfaceKey) bits() int {
353 // use 64 bits. This tests inlined interfaces
354 // on 64-bit targets and indirect interfaces on
355 // 32-bit targets.
356 return 64
358 func (k *EfaceKey) flipBit(i int) {
359 k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
361 func (k *EfaceKey) hash() uintptr {
362 return EfaceHash(k.i, 0)
364 func (k *EfaceKey) name() string {
365 return "Eface"
368 type IfaceKey struct {
369 i interface {
373 type fInter uint64
375 func (x fInter) F() {
378 func (k *IfaceKey) clear() {
379 k.i = nil
381 func (k *IfaceKey) random(r *rand.Rand) {
382 k.i = fInter(r.Int63())
384 func (k *IfaceKey) bits() int {
385 // use 64 bits. This tests inlined interfaces
386 // on 64-bit targets and indirect interfaces on
387 // 32-bit targets.
388 return 64
390 func (k *IfaceKey) flipBit(i int) {
391 k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
393 func (k *IfaceKey) hash() uintptr {
394 return IfaceHash(k.i, 0)
396 func (k *IfaceKey) name() string {
397 return "Iface"
400 // Flipping a single bit of a key should flip each output bit with 50% probability.
401 func TestSmhasherAvalanche(t *testing.T) {
402 if testing.Short() {
403 t.Skip("Skipping in short mode")
405 avalancheTest1(t, &BytesKey{make([]byte, 2)})
406 avalancheTest1(t, &BytesKey{make([]byte, 4)})
407 avalancheTest1(t, &BytesKey{make([]byte, 8)})
408 avalancheTest1(t, &BytesKey{make([]byte, 16)})
409 avalancheTest1(t, &BytesKey{make([]byte, 32)})
410 avalancheTest1(t, &BytesKey{make([]byte, 200)})
411 avalancheTest1(t, &Int32Key{})
412 avalancheTest1(t, &Int64Key{})
413 avalancheTest1(t, &EfaceKey{})
414 avalancheTest1(t, &IfaceKey{})
416 func avalancheTest1(t *testing.T, k Key) {
417 const REP = 100000
418 r := rand.New(rand.NewSource(1234))
419 n := k.bits()
421 // grid[i][j] is a count of whether flipping
422 // input bit i affects output bit j.
423 grid := make([][hashSize]int, n)
425 for z := 0; z < REP; z++ {
426 // pick a random key, hash it
427 k.random(r)
428 h := k.hash()
430 // flip each bit, hash & compare the results
431 for i := 0; i < n; i++ {
432 k.flipBit(i)
433 d := h ^ k.hash()
434 k.flipBit(i)
436 // record the effects of that bit flip
437 g := &grid[i]
438 for j := 0; j < hashSize; j++ {
439 g[j] += int(d & 1)
440 d >>= 1
445 // Each entry in the grid should be about REP/2.
446 // More precisely, we did N = k.bits() * hashSize experiments where
447 // each is the sum of REP coin flips. We want to find bounds on the
448 // sum of coin flips such that a truly random experiment would have
449 // all sums inside those bounds with 99% probability.
450 N := n * hashSize
451 var c float64
452 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
453 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
455 c *= 4.0 // allowed slack - we don't need to be perfectly random
456 mean := .5 * REP
457 stddev := .5 * math.Sqrt(REP)
458 low := int(mean - c*stddev)
459 high := int(mean + c*stddev)
460 for i := 0; i < n; i++ {
461 for j := 0; j < hashSize; j++ {
462 x := grid[i][j]
463 if x < low || x > high {
464 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
470 // All bit rotations of a set of distinct keys
471 func TestSmhasherWindowed(t *testing.T) {
472 windowed(t, &Int32Key{})
473 windowed(t, &Int64Key{})
474 windowed(t, &BytesKey{make([]byte, 128)})
476 func windowed(t *testing.T, k Key) {
477 if testing.Short() {
478 t.Skip("Skipping in short mode")
480 const BITS = 16
482 for r := 0; r < k.bits(); r++ {
483 h := newHashSet()
484 for i := 0; i < 1<<BITS; i++ {
485 k.clear()
486 for j := 0; j < BITS; j++ {
487 if i>>uint(j)&1 != 0 {
488 k.flipBit((j + r) % k.bits())
491 h.add(k.hash())
493 h.check(t)
497 // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
498 func TestSmhasherText(t *testing.T) {
499 if testing.Short() {
500 t.Skip("Skipping in short mode")
502 text(t, "Foo", "Bar")
503 text(t, "FooBar", "")
504 text(t, "", "FooBar")
506 func text(t *testing.T, prefix, suffix string) {
507 const N = 4
508 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
509 const L = len(S)
510 b := make([]byte, len(prefix)+N+len(suffix))
511 copy(b, prefix)
512 copy(b[len(prefix)+N:], suffix)
513 h := newHashSet()
514 c := b[len(prefix):]
515 for i := 0; i < L; i++ {
516 c[0] = S[i]
517 for j := 0; j < L; j++ {
518 c[1] = S[j]
519 for k := 0; k < L; k++ {
520 c[2] = S[k]
521 for x := 0; x < L; x++ {
522 c[3] = S[x]
523 h.addB(b)
528 h.check(t)
531 // Make sure different seed values generate different hashes.
532 func TestSmhasherSeed(t *testing.T) {
533 h := newHashSet()
534 const N = 100000
535 s := "hello"
536 for i := 0; i < N; i++ {
537 h.addS_seed(s, uintptr(i))
539 h.check(t)
542 // size of the hash output (32 or 64 bits)
543 const hashSize = 32 + int(^uintptr(0)>>63<<5)
545 func randBytes(r *rand.Rand, b []byte) {
546 for i := range b {
547 b[i] = byte(r.Uint32())
551 func benchmarkHash(b *testing.B, n int) {
552 s := strings.Repeat("A", n)
554 for i := 0; i < b.N; i++ {
555 StringHash(s, 0)
557 b.SetBytes(int64(n))
560 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) }
561 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) }
562 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) }
563 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) }
564 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
566 func TestArrayHash(t *testing.T) {
567 if Compiler == "gccgo" {
568 t.Skip("does not work on gccgo without better escape analysis")
571 // Make sure that "" in arrays hash correctly. The hash
572 // should at least scramble the input seed so that, e.g.,
573 // {"","foo"} and {"foo",""} have different hashes.
575 // If the hash is bad, then all (8 choose 4) = 70 keys
576 // have the same hash. If so, we allocate 70/8 = 8
577 // overflow buckets. If the hash is good we don't
578 // normally allocate any overflow buckets, and the
579 // probability of even one or two overflows goes down rapidly.
580 // (There is always 1 allocation of the bucket array. The map
581 // header is allocated on the stack.)
582 f := func() {
583 // Make the key type at most 128 bytes. Otherwise,
584 // we get an allocation per key.
585 type key [8]string
586 m := make(map[key]bool, 70)
588 // fill m with keys that have 4 "foo"s and 4 ""s.
589 for i := 0; i < 256; i++ {
590 var k key
591 cnt := 0
592 for j := uint(0); j < 8; j++ {
593 if i>>j&1 != 0 {
594 k[j] = "foo"
595 cnt++
598 if cnt == 4 {
599 m[k] = true
602 if len(m) != 70 {
603 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
606 if n := testing.AllocsPerRun(10, f); n > 6 {
607 t.Errorf("too many allocs %f - hash not balanced", n)
610 func TestStructHash(t *testing.T) {
611 // See the comment in TestArrayHash.
612 f := func() {
613 type key struct {
614 a, b, c, d, e, f, g, h string
616 m := make(map[key]bool, 70)
618 // fill m with keys that have 4 "foo"s and 4 ""s.
619 for i := 0; i < 256; i++ {
620 var k key
621 cnt := 0
622 if i&1 != 0 {
623 k.a = "foo"
624 cnt++
626 if i&2 != 0 {
627 k.b = "foo"
628 cnt++
630 if i&4 != 0 {
631 k.c = "foo"
632 cnt++
634 if i&8 != 0 {
635 k.d = "foo"
636 cnt++
638 if i&16 != 0 {
639 k.e = "foo"
640 cnt++
642 if i&32 != 0 {
643 k.f = "foo"
644 cnt++
646 if i&64 != 0 {
647 k.g = "foo"
648 cnt++
650 if i&128 != 0 {
651 k.h = "foo"
652 cnt++
654 if cnt == 4 {
655 m[k] = true
658 if len(m) != 70 {
659 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
662 if n := testing.AllocsPerRun(10, f); n > 6 {
663 t.Errorf("too many allocs %f - hash not balanced", n)
667 var sink uint64
669 func BenchmarkAlignedLoad(b *testing.B) {
670 var buf [16]byte
671 p := unsafe.Pointer(&buf[0])
672 var s uint64
673 for i := 0; i < b.N; i++ {
674 s += ReadUnaligned64(p)
676 sink = s
679 func BenchmarkUnalignedLoad(b *testing.B) {
680 var buf [16]byte
681 p := unsafe.Pointer(&buf[1])
682 var s uint64
683 for i := 0; i < b.N; i++ {
684 s += ReadUnaligned64(p)
686 sink = s
689 func TestCollisions(t *testing.T) {
690 if testing.Short() {
691 t.Skip("Skipping in short mode")
693 for i := 0; i < 16; i++ {
694 for j := 0; j < 16; j++ {
695 if j == i {
696 continue
698 var a [16]byte
699 m := make(map[uint16]struct{}, 1<<16)
700 for n := 0; n < 1<<16; n++ {
701 a[i] = byte(n)
702 a[j] = byte(n >> 8)
703 m[uint16(BytesHash(a[:], 0))] = struct{}{}
705 if len(m) <= 1<<15 {
706 t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))