libstdc++: Avoid -Wmaybe-uninitialized warnings in text_encoding.cc
[official-gcc.git] / libgo / go / runtime / hash_test.go
blobe8e8d50bb6c7ea6037b380bb68720a207c79fcea
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package runtime_test
7 import (
8 "fmt"
9 "math"
10 "math/rand"
11 . "runtime"
12 "strings"
13 "testing"
14 "unsafe"
17 func TestMemHash32Equality(t *testing.T) {
18 if *UseAeshash {
19 t.Skip("skipping since AES hash implementation is used")
21 var b [4]byte
22 r := rand.New(rand.NewSource(1234))
23 seed := uintptr(r.Uint64())
24 for i := 0; i < 100; i++ {
25 randBytes(r, b[:])
26 got := MemHash32(unsafe.Pointer(&b), seed)
27 want := MemHash(unsafe.Pointer(&b), seed, 4)
28 if got != want {
29 t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
34 func TestMemHash64Equality(t *testing.T) {
35 if *UseAeshash {
36 t.Skip("skipping since AES hash implementation is used")
38 var b [8]byte
39 r := rand.New(rand.NewSource(1234))
40 seed := uintptr(r.Uint64())
41 for i := 0; i < 100; i++ {
42 randBytes(r, b[:])
43 got := MemHash64(unsafe.Pointer(&b), seed)
44 want := MemHash(unsafe.Pointer(&b), seed, 8)
45 if got != want {
46 t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
51 // Smhasher is a torture test for hash functions.
52 // https://code.google.com/p/smhasher/
53 // This code is a port of some of the Smhasher tests to Go.
55 // The current AES hash function passes Smhasher. Our fallback
56 // hash functions don't, so we only enable the difficult tests when
57 // we know the AES implementation is available.
59 // Sanity checks.
60 // hash should not depend on values outside key.
61 // hash should not depend on alignment.
62 func TestSmhasherSanity(t *testing.T) {
63 r := rand.New(rand.NewSource(1234))
64 const REP = 10
65 const KEYMAX = 128
66 const PAD = 16
67 const OFFMAX = 16
68 for k := 0; k < REP; k++ {
69 for n := 0; n < KEYMAX; n++ {
70 for i := 0; i < OFFMAX; i++ {
71 var b [KEYMAX + OFFMAX + 2*PAD]byte
72 var c [KEYMAX + OFFMAX + 2*PAD]byte
73 randBytes(r, b[:])
74 randBytes(r, c[:])
75 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
76 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
77 t.Errorf("hash depends on bytes outside key")
84 type HashSet struct {
85 m map[uintptr]struct{} // set of hashes added
86 n int // number of hashes added
89 func newHashSet() *HashSet {
90 return &HashSet{make(map[uintptr]struct{}), 0}
92 func (s *HashSet) add(h uintptr) {
93 s.m[h] = struct{}{}
94 s.n++
96 func (s *HashSet) addS(x string) {
97 s.add(StringHash(x, 0))
99 func (s *HashSet) addB(x []byte) {
100 s.add(BytesHash(x, 0))
102 func (s *HashSet) addS_seed(x string, seed uintptr) {
103 s.add(StringHash(x, seed))
105 func (s *HashSet) check(t *testing.T) {
106 const SLOP = 50.0
107 collisions := s.n - len(s.m)
108 pairs := int64(s.n) * int64(s.n-1) / 2
109 expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
110 stddev := math.Sqrt(expected)
111 if float64(collisions) > expected+SLOP*(3*stddev+1) {
112 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions, expected, stddev, expected+SLOP*(3*stddev+1))
116 // a string plus adding zeros must make distinct hashes
117 func TestSmhasherAppendedZeros(t *testing.T) {
118 s := "hello" + strings.Repeat("\x00", 256)
119 h := newHashSet()
120 for i := 0; i <= len(s); i++ {
121 h.addS(s[:i])
123 h.check(t)
126 // All 0-3 byte strings have distinct hashes.
127 func TestSmhasherSmallKeys(t *testing.T) {
128 h := newHashSet()
129 var b [3]byte
130 for i := 0; i < 256; i++ {
131 b[0] = byte(i)
132 h.addB(b[:1])
133 for j := 0; j < 256; j++ {
134 b[1] = byte(j)
135 h.addB(b[:2])
136 if !testing.Short() {
137 for k := 0; k < 256; k++ {
138 b[2] = byte(k)
139 h.addB(b[:3])
144 h.check(t)
147 // Different length strings of all zeros have distinct hashes.
148 func TestSmhasherZeros(t *testing.T) {
149 N := 256 * 1024
150 if testing.Short() {
151 N = 1024
153 h := newHashSet()
154 b := make([]byte, N)
155 for i := 0; i <= N; i++ {
156 h.addB(b[:i])
158 h.check(t)
161 // Strings with up to two nonzero bytes all have distinct hashes.
162 func TestSmhasherTwoNonzero(t *testing.T) {
163 if GOARCH == "wasm" {
164 t.Skip("Too slow on wasm")
166 if testing.Short() {
167 t.Skip("Skipping in short mode")
169 h := newHashSet()
170 for n := 2; n <= 16; n++ {
171 twoNonZero(h, n)
173 h.check(t)
175 func twoNonZero(h *HashSet, n int) {
176 b := make([]byte, n)
178 // all zero
179 h.addB(b)
181 // one non-zero byte
182 for i := 0; i < n; i++ {
183 for x := 1; x < 256; x++ {
184 b[i] = byte(x)
185 h.addB(b)
186 b[i] = 0
190 // two non-zero bytes
191 for i := 0; i < n; i++ {
192 for x := 1; x < 256; x++ {
193 b[i] = byte(x)
194 for j := i + 1; j < n; j++ {
195 for y := 1; y < 256; y++ {
196 b[j] = byte(y)
197 h.addB(b)
198 b[j] = 0
201 b[i] = 0
206 // Test strings with repeats, like "abcdabcdabcdabcd..."
207 func TestSmhasherCyclic(t *testing.T) {
208 if testing.Short() {
209 t.Skip("Skipping in short mode")
211 r := rand.New(rand.NewSource(1234))
212 const REPEAT = 8
213 const N = 1000000
214 for n := 4; n <= 12; n++ {
215 h := newHashSet()
216 b := make([]byte, REPEAT*n)
217 for i := 0; i < N; i++ {
218 b[0] = byte(i * 79 % 97)
219 b[1] = byte(i * 43 % 137)
220 b[2] = byte(i * 151 % 197)
221 b[3] = byte(i * 199 % 251)
222 randBytes(r, b[4:n])
223 for j := n; j < n*REPEAT; j++ {
224 b[j] = b[j-n]
226 h.addB(b)
228 h.check(t)
232 // Test strings with only a few bits set
233 func TestSmhasherSparse(t *testing.T) {
234 if GOARCH == "wasm" {
235 t.Skip("Too slow on wasm")
237 if testing.Short() {
238 t.Skip("Skipping in short mode")
240 sparse(t, 32, 6)
241 sparse(t, 40, 6)
242 sparse(t, 48, 5)
243 sparse(t, 56, 5)
244 sparse(t, 64, 5)
245 sparse(t, 96, 4)
246 sparse(t, 256, 3)
247 sparse(t, 2048, 2)
249 func sparse(t *testing.T, n int, k int) {
250 b := make([]byte, n/8)
251 h := newHashSet()
252 setbits(h, b, 0, k)
253 h.check(t)
256 // set up to k bits at index i and greater
257 func setbits(h *HashSet, b []byte, i int, k int) {
258 h.addB(b)
259 if k == 0 {
260 return
262 for j := i; j < len(b)*8; j++ {
263 b[j/8] |= byte(1 << uint(j&7))
264 setbits(h, b, j+1, k-1)
265 b[j/8] &= byte(^(1 << uint(j&7)))
269 // Test all possible combinations of n blocks from the set s.
270 // "permutation" is a bad name here, but it is what Smhasher uses.
271 func TestSmhasherPermutation(t *testing.T) {
272 if GOARCH == "wasm" {
273 t.Skip("Too slow on wasm")
275 if testing.Short() {
276 t.Skip("Skipping in short mode")
278 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
279 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
280 permutation(t, []uint32{0, 1}, 20)
281 permutation(t, []uint32{0, 1 << 31}, 20)
282 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
284 func permutation(t *testing.T, s []uint32, n int) {
285 b := make([]byte, n*4)
286 h := newHashSet()
287 genPerm(h, b, s, 0)
288 h.check(t)
290 func genPerm(h *HashSet, b []byte, s []uint32, n int) {
291 h.addB(b[:n])
292 if n == len(b) {
293 return
295 for _, v := range s {
296 b[n] = byte(v)
297 b[n+1] = byte(v >> 8)
298 b[n+2] = byte(v >> 16)
299 b[n+3] = byte(v >> 24)
300 genPerm(h, b, s, n+4)
304 type Key interface {
305 clear() // set bits all to 0
306 random(r *rand.Rand) // set key to something random
307 bits() int // how many bits key has
308 flipBit(i int) // flip bit i of the key
309 hash() uintptr // hash the key
310 name() string // for error reporting
313 type BytesKey struct {
314 b []byte
317 func (k *BytesKey) clear() {
318 for i := range k.b {
319 k.b[i] = 0
322 func (k *BytesKey) random(r *rand.Rand) {
323 randBytes(r, k.b)
325 func (k *BytesKey) bits() int {
326 return len(k.b) * 8
328 func (k *BytesKey) flipBit(i int) {
329 k.b[i>>3] ^= byte(1 << uint(i&7))
331 func (k *BytesKey) hash() uintptr {
332 return BytesHash(k.b, 0)
334 func (k *BytesKey) name() string {
335 return fmt.Sprintf("bytes%d", len(k.b))
338 type Int32Key struct {
339 i uint32
342 func (k *Int32Key) clear() {
343 k.i = 0
345 func (k *Int32Key) random(r *rand.Rand) {
346 k.i = r.Uint32()
348 func (k *Int32Key) bits() int {
349 return 32
351 func (k *Int32Key) flipBit(i int) {
352 k.i ^= 1 << uint(i)
354 func (k *Int32Key) hash() uintptr {
355 return Int32Hash(k.i, 0)
357 func (k *Int32Key) name() string {
358 return "int32"
361 type Int64Key struct {
362 i uint64
365 func (k *Int64Key) clear() {
366 k.i = 0
368 func (k *Int64Key) random(r *rand.Rand) {
369 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
371 func (k *Int64Key) bits() int {
372 return 64
374 func (k *Int64Key) flipBit(i int) {
375 k.i ^= 1 << uint(i)
377 func (k *Int64Key) hash() uintptr {
378 return Int64Hash(k.i, 0)
380 func (k *Int64Key) name() string {
381 return "int64"
384 type EfaceKey struct {
385 i any
388 func (k *EfaceKey) clear() {
389 k.i = nil
391 func (k *EfaceKey) random(r *rand.Rand) {
392 k.i = uint64(r.Int63())
394 func (k *EfaceKey) bits() int {
395 // use 64 bits. This tests inlined interfaces
396 // on 64-bit targets and indirect interfaces on
397 // 32-bit targets.
398 return 64
400 func (k *EfaceKey) flipBit(i int) {
401 k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
403 func (k *EfaceKey) hash() uintptr {
404 return EfaceHash(k.i, 0)
406 func (k *EfaceKey) name() string {
407 return "Eface"
410 type IfaceKey struct {
411 i interface {
415 type fInter uint64
417 func (x fInter) F() {
420 func (k *IfaceKey) clear() {
421 k.i = nil
423 func (k *IfaceKey) random(r *rand.Rand) {
424 k.i = fInter(r.Int63())
426 func (k *IfaceKey) bits() int {
427 // use 64 bits. This tests inlined interfaces
428 // on 64-bit targets and indirect interfaces on
429 // 32-bit targets.
430 return 64
432 func (k *IfaceKey) flipBit(i int) {
433 k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
435 func (k *IfaceKey) hash() uintptr {
436 return IfaceHash(k.i, 0)
438 func (k *IfaceKey) name() string {
439 return "Iface"
442 // Flipping a single bit of a key should flip each output bit with 50% probability.
443 func TestSmhasherAvalanche(t *testing.T) {
444 if GOARCH == "wasm" {
445 t.Skip("Too slow on wasm")
447 if testing.Short() {
448 t.Skip("Skipping in short mode")
450 avalancheTest1(t, &BytesKey{make([]byte, 2)})
451 avalancheTest1(t, &BytesKey{make([]byte, 4)})
452 avalancheTest1(t, &BytesKey{make([]byte, 8)})
453 avalancheTest1(t, &BytesKey{make([]byte, 16)})
454 avalancheTest1(t, &BytesKey{make([]byte, 32)})
455 avalancheTest1(t, &BytesKey{make([]byte, 200)})
456 avalancheTest1(t, &Int32Key{})
457 avalancheTest1(t, &Int64Key{})
458 avalancheTest1(t, &EfaceKey{})
459 avalancheTest1(t, &IfaceKey{})
461 func avalancheTest1(t *testing.T, k Key) {
462 const REP = 100000
463 r := rand.New(rand.NewSource(1234))
464 n := k.bits()
466 // grid[i][j] is a count of whether flipping
467 // input bit i affects output bit j.
468 grid := make([][hashSize]int, n)
470 for z := 0; z < REP; z++ {
471 // pick a random key, hash it
472 k.random(r)
473 h := k.hash()
475 // flip each bit, hash & compare the results
476 for i := 0; i < n; i++ {
477 k.flipBit(i)
478 d := h ^ k.hash()
479 k.flipBit(i)
481 // record the effects of that bit flip
482 g := &grid[i]
483 for j := 0; j < hashSize; j++ {
484 g[j] += int(d & 1)
485 d >>= 1
490 // Each entry in the grid should be about REP/2.
491 // More precisely, we did N = k.bits() * hashSize experiments where
492 // each is the sum of REP coin flips. We want to find bounds on the
493 // sum of coin flips such that a truly random experiment would have
494 // all sums inside those bounds with 99% probability.
495 N := n * hashSize
496 var c float64
497 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
498 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
500 c *= 4.0 // allowed slack - we don't need to be perfectly random
501 mean := .5 * REP
502 stddev := .5 * math.Sqrt(REP)
503 low := int(mean - c*stddev)
504 high := int(mean + c*stddev)
505 for i := 0; i < n; i++ {
506 for j := 0; j < hashSize; j++ {
507 x := grid[i][j]
508 if x < low || x > high {
509 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
515 // All bit rotations of a set of distinct keys
516 func TestSmhasherWindowed(t *testing.T) {
517 t.Logf("32 bit keys")
518 windowed(t, &Int32Key{})
519 t.Logf("64 bit keys")
520 windowed(t, &Int64Key{})
521 t.Logf("string keys")
522 windowed(t, &BytesKey{make([]byte, 128)})
524 func windowed(t *testing.T, k Key) {
525 if GOARCH == "wasm" {
526 t.Skip("Too slow on wasm")
528 if testing.Short() {
529 t.Skip("Skipping in short mode")
531 const BITS = 16
533 for r := 0; r < k.bits(); r++ {
534 h := newHashSet()
535 for i := 0; i < 1<<BITS; i++ {
536 k.clear()
537 for j := 0; j < BITS; j++ {
538 if i>>uint(j)&1 != 0 {
539 k.flipBit((j + r) % k.bits())
542 h.add(k.hash())
544 h.check(t)
548 // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
549 func TestSmhasherText(t *testing.T) {
550 if testing.Short() {
551 t.Skip("Skipping in short mode")
553 text(t, "Foo", "Bar")
554 text(t, "FooBar", "")
555 text(t, "", "FooBar")
557 func text(t *testing.T, prefix, suffix string) {
558 const N = 4
559 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
560 const L = len(S)
561 b := make([]byte, len(prefix)+N+len(suffix))
562 copy(b, prefix)
563 copy(b[len(prefix)+N:], suffix)
564 h := newHashSet()
565 c := b[len(prefix):]
566 for i := 0; i < L; i++ {
567 c[0] = S[i]
568 for j := 0; j < L; j++ {
569 c[1] = S[j]
570 for k := 0; k < L; k++ {
571 c[2] = S[k]
572 for x := 0; x < L; x++ {
573 c[3] = S[x]
574 h.addB(b)
579 h.check(t)
582 // Make sure different seed values generate different hashes.
583 func TestSmhasherSeed(t *testing.T) {
584 h := newHashSet()
585 const N = 100000
586 s := "hello"
587 for i := 0; i < N; i++ {
588 h.addS_seed(s, uintptr(i))
590 h.check(t)
593 // size of the hash output (32 or 64 bits)
594 const hashSize = 32 + int(^uintptr(0)>>63<<5)
596 func randBytes(r *rand.Rand, b []byte) {
597 for i := range b {
598 b[i] = byte(r.Uint32())
602 func benchmarkHash(b *testing.B, n int) {
603 s := strings.Repeat("A", n)
605 for i := 0; i < b.N; i++ {
606 StringHash(s, 0)
608 b.SetBytes(int64(n))
611 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) }
612 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) }
613 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) }
614 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) }
615 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
617 func TestArrayHash(t *testing.T) {
618 if Compiler == "gccgo" {
619 t.Skip("does not work on gccgo without better escape analysis")
622 // Make sure that "" in arrays hash correctly. The hash
623 // should at least scramble the input seed so that, e.g.,
624 // {"","foo"} and {"foo",""} have different hashes.
626 // If the hash is bad, then all (8 choose 4) = 70 keys
627 // have the same hash. If so, we allocate 70/8 = 8
628 // overflow buckets. If the hash is good we don't
629 // normally allocate any overflow buckets, and the
630 // probability of even one or two overflows goes down rapidly.
631 // (There is always 1 allocation of the bucket array. The map
632 // header is allocated on the stack.)
633 f := func() {
634 // Make the key type at most 128 bytes. Otherwise,
635 // we get an allocation per key.
636 type key [8]string
637 m := make(map[key]bool, 70)
639 // fill m with keys that have 4 "foo"s and 4 ""s.
640 for i := 0; i < 256; i++ {
641 var k key
642 cnt := 0
643 for j := uint(0); j < 8; j++ {
644 if i>>j&1 != 0 {
645 k[j] = "foo"
646 cnt++
649 if cnt == 4 {
650 m[k] = true
653 if len(m) != 70 {
654 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
657 if n := testing.AllocsPerRun(10, f); n > 6 {
658 t.Errorf("too many allocs %f - hash not balanced", n)
661 func TestStructHash(t *testing.T) {
662 // See the comment in TestArrayHash.
663 f := func() {
664 type key struct {
665 a, b, c, d, e, f, g, h string
667 m := make(map[key]bool, 70)
669 // fill m with keys that have 4 "foo"s and 4 ""s.
670 for i := 0; i < 256; i++ {
671 var k key
672 cnt := 0
673 if i&1 != 0 {
674 k.a = "foo"
675 cnt++
677 if i&2 != 0 {
678 k.b = "foo"
679 cnt++
681 if i&4 != 0 {
682 k.c = "foo"
683 cnt++
685 if i&8 != 0 {
686 k.d = "foo"
687 cnt++
689 if i&16 != 0 {
690 k.e = "foo"
691 cnt++
693 if i&32 != 0 {
694 k.f = "foo"
695 cnt++
697 if i&64 != 0 {
698 k.g = "foo"
699 cnt++
701 if i&128 != 0 {
702 k.h = "foo"
703 cnt++
705 if cnt == 4 {
706 m[k] = true
709 if len(m) != 70 {
710 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
713 if n := testing.AllocsPerRun(10, f); n > 6 {
714 t.Errorf("too many allocs %f - hash not balanced", n)
718 var sink uint64
720 func BenchmarkAlignedLoad(b *testing.B) {
721 var buf [16]byte
722 p := unsafe.Pointer(&buf[0])
723 var s uint64
724 for i := 0; i < b.N; i++ {
725 s += ReadUnaligned64(p)
727 sink = s
730 func BenchmarkUnalignedLoad(b *testing.B) {
731 var buf [16]byte
732 p := unsafe.Pointer(&buf[1])
733 var s uint64
734 for i := 0; i < b.N; i++ {
735 s += ReadUnaligned64(p)
737 sink = s
740 func TestCollisions(t *testing.T) {
741 if testing.Short() {
742 t.Skip("Skipping in short mode")
744 for i := 0; i < 16; i++ {
745 for j := 0; j < 16; j++ {
746 if j == i {
747 continue
749 var a [16]byte
750 m := make(map[uint16]struct{}, 1<<16)
751 for n := 0; n < 1<<16; n++ {
752 a[i] = byte(n)
753 a[j] = byte(n >> 8)
754 m[uint16(BytesHash(a[:], 0))] = struct{}{}
756 if len(m) <= 1<<15 {
757 t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))