libgo/go/testing/benchmark.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package testing
   6
   7 import (
   8         "flag"
   9         "fmt"
  10         "os"
  11         "runtime"
  12         "sync"
  13         "sync/atomic"
  14         "time"
  15 )
  16
  17 var matchBenchmarks = flag.String("test.bench", "", "regular expression to select benchmarks to run")
  18 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "approximate run time for each benchmark")
  19 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
  20
  21 // Global lock to ensure only one benchmark runs at a time.
  22 var benchmarkLock sync.Mutex
  23
  24 // Used for every benchmark for measuring memory.
  25 var memStats runtime.MemStats
  26
  27 // An internal type but exported because it is cross-package; part of the implementation
  28 // of the "go test" command.
  29 type InternalBenchmark struct {
  30         Name string
  31         F    func(b *B)
  32 }
  33
  34 // B is a type passed to Benchmark functions to manage benchmark
  35 // timing and to specify the number of iterations to run.
  36 type B struct {
  37         common
  38         N                int
  39         previousN        int           // number of iterations in the previous run
  40         previousDuration time.Duration // total duration of the previous run
  41         benchmark        InternalBenchmark
  42         bytes            int64
  43         timerOn          bool
  44         showAllocResult  bool
  45         result           BenchmarkResult
  46         parallelism      int // RunParallel creates parallelism*GOMAXPROCS goroutines
  47         // The initial states of memStats.Mallocs and memStats.TotalAlloc.
  48         startAllocs uint64
  49         startBytes  uint64
  50         // The net total of this test after being run.
  51         netAllocs uint64
  52         netBytes  uint64
  53 }
  54
  55 // StartTimer starts timing a test.  This function is called automatically
  56 // before a benchmark starts, but it can also used to resume timing after
  57 // a call to StopTimer.
  58 func (b *B) StartTimer() {
  59         if !b.timerOn {
  60                 runtime.ReadMemStats(&memStats)
  61                 b.startAllocs = memStats.Mallocs
  62                 b.startBytes = memStats.TotalAlloc
  63                 b.start = time.Now()
  64                 b.timerOn = true
  65         }
  66 }
  67
  68 // StopTimer stops timing a test.  This can be used to pause the timer
  69 // while performing complex initialization that you don't
  70 // want to measure.
  71 func (b *B) StopTimer() {
  72         if b.timerOn {
  73                 b.duration += time.Now().Sub(b.start)
  74                 runtime.ReadMemStats(&memStats)
  75                 b.netAllocs += memStats.Mallocs - b.startAllocs
  76                 b.netBytes += memStats.TotalAlloc - b.startBytes
  77                 b.timerOn = false
  78         }
  79 }
  80
  81 // ResetTimer zeros the elapsed benchmark time and memory allocation counters.
  82 // It does not affect whether the timer is running.
  83 func (b *B) ResetTimer() {
  84         if b.timerOn {
  85                 runtime.ReadMemStats(&memStats)
  86                 b.startAllocs = memStats.Mallocs
  87                 b.startBytes = memStats.TotalAlloc
  88                 b.start = time.Now()
  89         }
  90         b.duration = 0
  91         b.netAllocs = 0
  92         b.netBytes = 0
  93 }
  94
  95 // SetBytes records the number of bytes processed in a single operation.
  96 // If this is called, the benchmark will report ns/op and MB/s.
  97 func (b *B) SetBytes(n int64) { b.bytes = n }
  98
  99 // ReportAllocs enables malloc statistics for this benchmark.
 100 // It is equivalent to setting -test.benchmem, but it only affects the
 101 // benchmark function that calls ReportAllocs.
 102 func (b *B) ReportAllocs() {
 103         b.showAllocResult = true
 104 }
 105
 106 func (b *B) nsPerOp() int64 {
 107         if b.N <= 0 {
 108                 return 0
 109         }
 110         return b.duration.Nanoseconds() / int64(b.N)
 111 }
 112
 113 // runN runs a single benchmark for the specified number of iterations.
 114 func (b *B) runN(n int) {
 115         benchmarkLock.Lock()
 116         defer benchmarkLock.Unlock()
 117         // Try to get a comparable environment for each run
 118         // by clearing garbage from previous runs.
 119         runtime.GC()
 120         b.N = n
 121         b.parallelism = 1
 122         b.ResetTimer()
 123         b.StartTimer()
 124         b.benchmark.F(b)
 125         b.StopTimer()
 126         b.previousN = n
 127         b.previousDuration = b.duration
 128 }
 129
 130 func min(x, y int) int {
 131         if x > y {
 132                 return y
 133         }
 134         return x
 135 }
 136
 137 func max(x, y int) int {
 138         if x < y {
 139                 return y
 140         }
 141         return x
 142 }
 143
 144 // roundDown10 rounds a number down to the nearest power of 10.
 145 func roundDown10(n int) int {
 146         var tens = 0
 147         // tens = floor(log_10(n))
 148         for n >= 10 {
 149                 n = n / 10
 150                 tens++
 151         }
 152         // result = 10^tens
 153         result := 1
 154         for i := 0; i < tens; i++ {
 155                 result *= 10
 156         }
 157         return result
 158 }
 159
 160 // roundUp rounds x up to a number of the form [1eX, 2eX, 5eX].
 161 func roundUp(n int) int {
 162         base := roundDown10(n)
 163         switch {
 164         case n <= base:
 165                 return base
 166         case n <= (2 * base):
 167                 return 2 * base
 168         case n <= (5 * base):
 169                 return 5 * base
 170         default:
 171                 return 10 * base
 172         }
 173 }
 174
 175 // run times the benchmark function in a separate goroutine.
 176 func (b *B) run() BenchmarkResult {
 177         go b.launch()
 178         <-b.signal
 179         return b.result
 180 }
 181
 182 // launch launches the benchmark function.  It gradually increases the number
 183 // of benchmark iterations until the benchmark runs for a second in order
 184 // to get a reasonable measurement.  It prints timing information in this form
 185 //              testing.BenchmarkHello  100000          19 ns/op
 186 // launch is run by the fun function as a separate goroutine.
 187 func (b *B) launch() {
 188         // Run the benchmark for a single iteration in case it's expensive.
 189         n := 1
 190
 191         // Signal that we're done whether we return normally
 192         // or by FailNow's runtime.Goexit.
 193         defer func() {
 194                 b.signal <- b
 195         }()
 196
 197         b.runN(n)
 198         // Run the benchmark for at least the specified amount of time.
 199         d := *benchTime
 200         for !b.failed && b.duration < d && n < 1e9 {
 201                 last := n
 202                 // Predict iterations/sec.
 203                 if b.nsPerOp() == 0 {
 204                         n = 1e9
 205                 } else {
 206                         n = int(d.Nanoseconds() / b.nsPerOp())
 207                 }
 208                 // Run more iterations than we think we'll need for a second (1.5x).
 209                 // Don't grow too fast in case we had timing errors previously.
 210                 // Be sure to run at least one more than last time.
 211                 n = max(min(n+n/2, 100*last), last+1)
 212                 // Round up to something easy to read.
 213                 n = roundUp(n)
 214                 b.runN(n)
 215         }
 216         b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes}
 217 }
 218
 219 // The results of a benchmark run.
 220 type BenchmarkResult struct {
 221         N         int           // The number of iterations.
 222         T         time.Duration // The total time taken.
 223         Bytes     int64         // Bytes processed in one iteration.
 224         MemAllocs uint64        // The total number of memory allocations.
 225         MemBytes  uint64        // The total number of bytes allocated.
 226 }
 227
 228 func (r BenchmarkResult) NsPerOp() int64 {
 229         if r.N <= 0 {
 230                 return 0
 231         }
 232         return r.T.Nanoseconds() / int64(r.N)
 233 }
 234
 235 func (r BenchmarkResult) mbPerSec() float64 {
 236         if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
 237                 return 0
 238         }
 239         return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
 240 }
 241
 242 func (r BenchmarkResult) AllocsPerOp() int64 {
 243         if r.N <= 0 {
 244                 return 0
 245         }
 246         return int64(r.MemAllocs) / int64(r.N)
 247 }
 248
 249 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
 250         if r.N <= 0 {
 251                 return 0
 252         }
 253         return int64(r.MemBytes) / int64(r.N)
 254 }
 255
 256 func (r BenchmarkResult) String() string {
 257         mbs := r.mbPerSec()
 258         mb := ""
 259         if mbs != 0 {
 260                 mb = fmt.Sprintf("\t%7.2f MB/s", mbs)
 261         }
 262         nsop := r.NsPerOp()
 263         ns := fmt.Sprintf("%10d ns/op", nsop)
 264         if r.N > 0 && nsop < 100 {
 265                 // The format specifiers here make sure that
 266                 // the ones digits line up for all three possible formats.
 267                 if nsop < 10 {
 268                         ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
 269                 } else {
 270                         ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
 271                 }
 272         }
 273         return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb)
 274 }
 275
 276 func (r BenchmarkResult) MemString() string {
 277         return fmt.Sprintf("%8d B/op\t%8d allocs/op",
 278                 r.AllocedBytesPerOp(), r.AllocsPerOp())
 279 }
 280
 281 // An internal function but exported because it is cross-package; part of the implementation
 282 // of the "go test" command.
 283 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
 284         // If no flag was specified, don't run benchmarks.
 285         if len(*matchBenchmarks) == 0 {
 286                 return
 287         }
 288         for _, Benchmark := range benchmarks {
 289                 matched, err := matchString(*matchBenchmarks, Benchmark.Name)
 290                 if err != nil {
 291                         fmt.Fprintf(os.Stderr, "testing: invalid regexp for -test.bench: %s\n", err)
 292                         os.Exit(1)
 293                 }
 294                 if !matched {
 295                         continue
 296                 }
 297                 for _, procs := range cpuList {
 298                         runtime.GOMAXPROCS(procs)
 299                         b := &B{
 300                                 common: common{
 301                                         signal: make(chan interface{}),
 302                                 },
 303                                 benchmark: Benchmark,
 304                         }
 305                         benchName := Benchmark.Name
 306                         if procs != 1 {
 307                                 benchName = fmt.Sprintf("%s-%d", Benchmark.Name, procs)
 308                         }
 309                         fmt.Printf("%s\t", benchName)
 310                         r := b.run()
 311                         if b.failed {
 312                                 // The output could be very long here, but probably isn't.
 313                                 // We print it all, regardless, because we don't want to trim the reason
 314                                 // the benchmark failed.
 315                                 fmt.Printf("--- FAIL: %s\n%s", benchName, b.output)
 316                                 continue
 317                         }
 318                         results := r.String()
 319                         if *benchmarkMemory || b.showAllocResult {
 320                                 results += "\t" + r.MemString()
 321                         }
 322                         fmt.Println(results)
 323                         // Unlike with tests, we ignore the -chatty flag and always print output for
 324                         // benchmarks since the output generation time will skew the results.
 325                         if len(b.output) > 0 {
 326                                 b.trimOutput()
 327                                 fmt.Printf("--- BENCH: %s\n%s", benchName, b.output)
 328                         }
 329                         if p := runtime.GOMAXPROCS(-1); p != procs {
 330                                 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
 331                         }
 332                 }
 333         }
 334 }
 335
 336 // trimOutput shortens the output from a benchmark, which can be very long.
 337 func (b *B) trimOutput() {
 338         // The output is likely to appear multiple times because the benchmark
 339         // is run multiple times, but at least it will be seen. This is not a big deal
 340         // because benchmarks rarely print, but just in case, we trim it if it's too long.
 341         const maxNewlines = 10
 342         for nlCount, j := 0, 0; j < len(b.output); j++ {
 343                 if b.output[j] == '\n' {
 344                         nlCount++
 345                         if nlCount >= maxNewlines {
 346                                 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
 347                                 break
 348                         }
 349                 }
 350         }
 351 }
 352
 353 // A PB is used by RunParallel for running parallel benchmarks.
 354 type PB struct {
 355         globalN *uint64 // shared between all worker goroutines iteration counter
 356         grain   uint64  // acquire that many iterations from globalN at once
 357         cache   uint64  // local cache of acquired iterations
 358         bN      uint64  // total number of iterations to execute (b.N)
 359 }
 360
 361 // Next reports whether there are more iterations to execute.
 362 func (pb *PB) Next() bool {
 363         if pb.cache == 0 {
 364                 n := atomic.AddUint64(pb.globalN, pb.grain)
 365                 if n <= pb.bN {
 366                         pb.cache = pb.grain
 367                 } else if n < pb.bN+pb.grain {
 368                         pb.cache = pb.bN + pb.grain - n
 369                 } else {
 370                         return false
 371                 }
 372         }
 373         pb.cache--
 374         return true
 375 }
 376
 377 // RunParallel runs a benchmark in parallel.
 378 // It creates multiple goroutines and distributes b.N iterations among them.
 379 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
 380 // non-CPU-bound benchmarks, call SetParallelism before RunParallel.
 381 // RunParallel is usually used with the go test -cpu flag.
 382 //
 383 // The body function will be run in each goroutine. It should set up any
 384 // goroutine-local state and then iterate until pb.Next returns false.
 385 // It should not use the StartTimer, StopTimer, or ResetTimer functions,
 386 // because they have global effect.
 387 func (b *B) RunParallel(body func(*PB)) {
 388         // Calculate grain size as number of iterations that take ~100µs.
 389         // 100µs is enough to amortize the overhead and provide sufficient
 390         // dynamic load balancing.
 391         grain := uint64(0)
 392         if b.previousN > 0 && b.previousDuration > 0 {
 393                 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
 394         }
 395         if grain < 1 {
 396                 grain = 1
 397         }
 398         // We expect the inner loop and function call to take at least 10ns,
 399         // so do not do more than 100µs/10ns=1e4 iterations.
 400         if grain > 1e4 {
 401                 grain = 1e4
 402         }
 403
 404         n := uint64(0)
 405         numProcs := b.parallelism * runtime.GOMAXPROCS(0)
 406         var wg sync.WaitGroup
 407         wg.Add(numProcs)
 408         for p := 0; p < numProcs; p++ {
 409                 go func() {
 410                         defer wg.Done()
 411                         pb := &PB{
 412                                 globalN: &n,
 413                                 grain:   grain,
 414                                 bN:      uint64(b.N),
 415                         }
 416                         body(pb)
 417                 }()
 418         }
 419         wg.Wait()
 420         if n <= uint64(b.N) && !b.Failed() {
 421                 b.Fatal("RunParallel: body exited without pb.Next() == false")
 422         }
 423 }
 424
 425 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
 426 // There is usually no need to call SetParallelism for CPU-bound benchmarks.
 427 // If p is less than 1, this call will have no effect.
 428 func (b *B) SetParallelism(p int) {
 429         if p >= 1 {
 430                 b.parallelism = p
 431         }
 432 }
 433
 434 // Benchmark benchmarks a single function. Useful for creating
 435 // custom benchmarks that do not use the "go test" command.
 436 func Benchmark(f func(b *B)) BenchmarkResult {
 437         b := &B{
 438                 common: common{
 439                         signal: make(chan interface{}),
 440                 },
 441                 benchmark: InternalBenchmark{"", f},
 442         }
 443         return b.run()
 444 }