2017-03-02 Richard Biener <rguenther@suse.de>
[official-gcc.git] / libgo / go / runtime / mprof.go
blob1bfdc39b624610d877450c84970051e2879424ef
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
8 package runtime
10 import (
11 "runtime/internal/atomic"
12 "unsafe"
15 // Export temporarily for gccgo's C code to call:
16 //go:linkname mProf_Malloc runtime.mProf_Malloc
17 //go:linkname mProf_Free runtime.mProf_Free
18 //go:linkname mProf_GC runtime.mProf_GC
19 //go:linkname tracealloc runtime.tracealloc
20 //go:linkname tracefree runtime.tracefree
21 //go:linkname tracegc runtime.tracegc
22 //go:linkname iterate_memprof runtime.iterate_memprof
24 // NOTE(rsc): Everything here could use cas if contention became an issue.
25 var proflock mutex
27 // All memory allocations are local and do not escape outside of the profiler.
28 // The profiler is forbidden from referring to garbage-collected memory.
30 const (
31 // profile types
32 memProfile bucketType = 1 + iota
33 blockProfile
34 mutexProfile
36 // size of bucket hash table
37 buckHashSize = 179999
39 // max depth of stack to record in bucket
40 maxStack = 32
43 type bucketType int
45 // A bucket holds per-call-stack profiling information.
46 // The representation is a bit sleazy, inherited from C.
47 // This struct defines the bucket header. It is followed in
48 // memory by the stack words and then the actual record
49 // data, either a memRecord or a blockRecord.
51 // Per-call-stack profiling information.
52 // Lookup by hashing call stack into a linked-list hash table.
54 // No heap pointers.
56 //go:notinheap
57 type bucket struct {
58 next *bucket
59 allnext *bucket
60 typ bucketType // memBucket or blockBucket (includes mutexProfile)
61 hash uintptr
62 size uintptr
63 nstk uintptr
66 // A memRecord is the bucket data for a bucket of type memProfile,
67 // part of the memory profile.
68 type memRecord struct {
69 // The following complex 3-stage scheme of stats accumulation
70 // is required to obtain a consistent picture of mallocs and frees
71 // for some point in time.
72 // The problem is that mallocs come in real time, while frees
73 // come only after a GC during concurrent sweeping. So if we would
74 // naively count them, we would get a skew toward mallocs.
76 // Mallocs are accounted in recent stats.
77 // Explicit frees are accounted in recent stats.
78 // GC frees are accounted in prev stats.
79 // After GC prev stats are added to final stats and
80 // recent stats are moved into prev stats.
81 allocs uintptr
82 frees uintptr
83 alloc_bytes uintptr
84 free_bytes uintptr
86 // changes between next-to-last GC and last GC
87 prev_allocs uintptr
88 prev_frees uintptr
89 prev_alloc_bytes uintptr
90 prev_free_bytes uintptr
92 // changes since last GC
93 recent_allocs uintptr
94 recent_frees uintptr
95 recent_alloc_bytes uintptr
96 recent_free_bytes uintptr
99 // A blockRecord is the bucket data for a bucket of type blockProfile,
100 // which is used in blocking and mutex profiles.
101 type blockRecord struct {
102 count int64
103 cycles int64
106 var (
107 mbuckets *bucket // memory profile buckets
108 bbuckets *bucket // blocking profile buckets
109 xbuckets *bucket // mutex profile buckets
110 buckhash *[179999]*bucket
111 bucketmem uintptr
114 // newBucket allocates a bucket with the given type and number of stack entries.
115 func newBucket(typ bucketType, nstk int) *bucket {
116 size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{})
117 switch typ {
118 default:
119 throw("invalid profile bucket type")
120 case memProfile:
121 size += unsafe.Sizeof(memRecord{})
122 case blockProfile, mutexProfile:
123 size += unsafe.Sizeof(blockRecord{})
126 b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))
127 bucketmem += size
128 b.typ = typ
129 b.nstk = uintptr(nstk)
130 return b
133 // stk returns the slice in b holding the stack.
134 func (b *bucket) stk() []location {
135 stk := (*[maxStack]location)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
136 return stk[:b.nstk:b.nstk]
139 // mp returns the memRecord associated with the memProfile bucket b.
140 func (b *bucket) mp() *memRecord {
141 if b.typ != memProfile {
142 throw("bad use of bucket.mp")
144 data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{}))
145 return (*memRecord)(data)
148 // bp returns the blockRecord associated with the blockProfile bucket b.
149 func (b *bucket) bp() *blockRecord {
150 if b.typ != blockProfile && b.typ != mutexProfile {
151 throw("bad use of bucket.bp")
153 data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{}))
154 return (*blockRecord)(data)
157 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
158 func stkbucket(typ bucketType, size uintptr, stk []location, alloc bool) *bucket {
159 if buckhash == nil {
160 buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys))
161 if buckhash == nil {
162 throw("runtime: cannot allocate memory")
166 // Hash stack.
167 var h uintptr
168 for _, loc := range stk {
169 h += loc.pc
170 h += h << 10
171 h ^= h >> 6
173 // hash in size
174 h += size
175 h += h << 10
176 h ^= h >> 6
177 // finalize
178 h += h << 3
179 h ^= h >> 11
181 i := int(h % buckHashSize)
182 for b := buckhash[i]; b != nil; b = b.next {
183 if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
184 return b
188 if !alloc {
189 return nil
192 // Create new bucket.
193 b := newBucket(typ, len(stk))
194 copy(b.stk(), stk)
195 b.hash = h
196 b.size = size
197 b.next = buckhash[i]
198 buckhash[i] = b
199 if typ == memProfile {
200 b.allnext = mbuckets
201 mbuckets = b
202 } else if typ == mutexProfile {
203 b.allnext = xbuckets
204 xbuckets = b
205 } else {
206 b.allnext = bbuckets
207 bbuckets = b
209 return b
212 func eqslice(x, y []location) bool {
213 if len(x) != len(y) {
214 return false
216 for i, xi := range x {
217 if xi != y[i] {
218 return false
221 return true
224 func mprof_GC() {
225 for b := mbuckets; b != nil; b = b.allnext {
226 mp := b.mp()
227 mp.allocs += mp.prev_allocs
228 mp.frees += mp.prev_frees
229 mp.alloc_bytes += mp.prev_alloc_bytes
230 mp.free_bytes += mp.prev_free_bytes
232 mp.prev_allocs = mp.recent_allocs
233 mp.prev_frees = mp.recent_frees
234 mp.prev_alloc_bytes = mp.recent_alloc_bytes
235 mp.prev_free_bytes = mp.recent_free_bytes
237 mp.recent_allocs = 0
238 mp.recent_frees = 0
239 mp.recent_alloc_bytes = 0
240 mp.recent_free_bytes = 0
244 // Record that a gc just happened: all the 'recent' statistics are now real.
245 func mProf_GC() {
246 lock(&proflock)
247 mprof_GC()
248 unlock(&proflock)
251 // Called by malloc to record a profiled block.
252 func mProf_Malloc(p unsafe.Pointer, size uintptr) {
253 var stk [maxStack]location
254 nstk := callers(4, stk[:])
255 lock(&proflock)
256 b := stkbucket(memProfile, size, stk[:nstk], true)
257 mp := b.mp()
258 mp.recent_allocs++
259 mp.recent_alloc_bytes += size
260 unlock(&proflock)
262 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
263 // This reduces potential contention and chances of deadlocks.
264 // Since the object must be alive during call to mProf_Malloc,
265 // it's fine to do this non-atomically.
266 systemstack(func() {
267 setprofilebucket(p, b)
271 // Called when freeing a profiled block.
272 func mProf_Free(b *bucket, size uintptr) {
273 lock(&proflock)
274 mp := b.mp()
275 mp.prev_frees++
276 mp.prev_free_bytes += size
277 unlock(&proflock)
280 var blockprofilerate uint64 // in CPU ticks
282 // SetBlockProfileRate controls the fraction of goroutine blocking events
283 // that are reported in the blocking profile. The profiler aims to sample
284 // an average of one blocking event per rate nanoseconds spent blocked.
286 // To include every blocking event in the profile, pass rate = 1.
287 // To turn off profiling entirely, pass rate <= 0.
288 func SetBlockProfileRate(rate int) {
289 var r int64
290 if rate <= 0 {
291 r = 0 // disable profiling
292 } else if rate == 1 {
293 r = 1 // profile everything
294 } else {
295 // convert ns to cycles, use float64 to prevent overflow during multiplication
296 r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))
297 if r == 0 {
298 r = 1
302 atomic.Store64(&blockprofilerate, uint64(r))
305 func blockevent(cycles int64, skip int) {
306 if cycles <= 0 {
307 cycles = 1
309 if blocksampled(cycles) {
310 saveblockevent(cycles, skip+1, blockProfile, &blockprofilerate)
314 func blocksampled(cycles int64) bool {
315 rate := int64(atomic.Load64(&blockprofilerate))
316 if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
317 return false
319 return true
322 func saveblockevent(cycles int64, skip int, which bucketType, ratep *uint64) {
323 gp := getg()
324 var nstk int
325 var stk [maxStack]location
326 if gp.m.curg == nil || gp.m.curg == gp {
327 nstk = callers(skip, stk[:])
328 } else {
329 // FIXME: This should get a traceback of gp.m.curg.
330 // nstk = gcallers(gp.m.curg, skip, stk[:])
331 nstk = callers(skip, stk[:])
333 lock(&proflock)
334 b := stkbucket(which, 0, stk[:nstk], true)
335 b.bp().count++
336 b.bp().cycles += cycles
337 unlock(&proflock)
340 var mutexprofilerate uint64 // fraction sampled
342 // SetMutexProfileFraction controls the fraction of mutex contention events
343 // that are reported in the mutex profile. On average 1/rate events are
344 // reported. The previous rate is returned.
346 // To turn off profiling entirely, pass rate 0.
347 // To just read the current rate, pass rate -1.
348 // (For n>1 the details of sampling may change.)
349 func SetMutexProfileFraction(rate int) int {
350 if rate < 0 {
351 return int(mutexprofilerate)
353 old := mutexprofilerate
354 atomic.Store64(&mutexprofilerate, uint64(rate))
355 return int(old)
358 //go:linkname mutexevent sync.event
359 func mutexevent(cycles int64, skip int) {
360 if cycles < 0 {
361 cycles = 0
363 rate := int64(atomic.Load64(&mutexprofilerate))
364 // TODO(pjw): measure impact of always calling fastrand vs using something
365 // like malloc.go:nextSample()
366 if rate > 0 && int64(fastrand())%rate == 0 {
367 saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate)
371 // Go interface to profile data.
373 // A StackRecord describes a single execution stack.
374 type StackRecord struct {
375 Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
378 // Stack returns the stack trace associated with the record,
379 // a prefix of r.Stack0.
380 func (r *StackRecord) Stack() []uintptr {
381 for i, v := range r.Stack0 {
382 if v == 0 {
383 return r.Stack0[0:i]
386 return r.Stack0[0:]
389 // MemProfileRate controls the fraction of memory allocations
390 // that are recorded and reported in the memory profile.
391 // The profiler aims to sample an average of
392 // one allocation per MemProfileRate bytes allocated.
394 // To include every allocated block in the profile, set MemProfileRate to 1.
395 // To turn off profiling entirely, set MemProfileRate to 0.
397 // The tools that process the memory profiles assume that the
398 // profile rate is constant across the lifetime of the program
399 // and equal to the current value. Programs that change the
400 // memory profiling rate should do so just once, as early as
401 // possible in the execution of the program (for example,
402 // at the beginning of main).
403 var MemProfileRate int = 512 * 1024
405 // A MemProfileRecord describes the live objects allocated
406 // by a particular call sequence (stack trace).
407 type MemProfileRecord struct {
408 AllocBytes, FreeBytes int64 // number of bytes allocated, freed
409 AllocObjects, FreeObjects int64 // number of objects allocated, freed
410 Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
413 // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
414 func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
416 // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
417 func (r *MemProfileRecord) InUseObjects() int64 {
418 return r.AllocObjects - r.FreeObjects
421 // Stack returns the stack trace associated with the record,
422 // a prefix of r.Stack0.
423 func (r *MemProfileRecord) Stack() []uintptr {
424 for i, v := range r.Stack0 {
425 if v == 0 {
426 return r.Stack0[0:i]
429 return r.Stack0[0:]
432 // MemProfile returns a profile of memory allocated and freed per allocation
433 // site.
435 // MemProfile returns n, the number of records in the current memory profile.
436 // If len(p) >= n, MemProfile copies the profile into p and returns n, true.
437 // If len(p) < n, MemProfile does not change p and returns n, false.
439 // If inuseZero is true, the profile includes allocation records
440 // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
441 // These are sites where memory was allocated, but it has all
442 // been released back to the runtime.
444 // The returned profile may be up to two garbage collection cycles old.
445 // This is to avoid skewing the profile toward allocations; because
446 // allocations happen in real time but frees are delayed until the garbage
447 // collector performs sweeping, the profile only accounts for allocations
448 // that have had a chance to be freed by the garbage collector.
450 // Most clients should use the runtime/pprof package or
451 // the testing package's -test.memprofile flag instead
452 // of calling MemProfile directly.
453 func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
454 lock(&proflock)
455 clear := true
456 for b := mbuckets; b != nil; b = b.allnext {
457 mp := b.mp()
458 if inuseZero || mp.alloc_bytes != mp.free_bytes {
461 if mp.allocs != 0 || mp.frees != 0 {
462 clear = false
465 if clear {
466 // Absolutely no data, suggesting that a garbage collection
467 // has not yet happened. In order to allow profiling when
468 // garbage collection is disabled from the beginning of execution,
469 // accumulate stats as if a GC just happened, and recount buckets.
470 mprof_GC()
471 mprof_GC()
472 n = 0
473 for b := mbuckets; b != nil; b = b.allnext {
474 mp := b.mp()
475 if inuseZero || mp.alloc_bytes != mp.free_bytes {
480 if n <= len(p) {
481 ok = true
482 idx := 0
483 for b := mbuckets; b != nil; b = b.allnext {
484 mp := b.mp()
485 if inuseZero || mp.alloc_bytes != mp.free_bytes {
486 record(&p[idx], b)
487 idx++
491 unlock(&proflock)
492 return
495 // Write b's data to r.
496 func record(r *MemProfileRecord, b *bucket) {
497 mp := b.mp()
498 r.AllocBytes = int64(mp.alloc_bytes)
499 r.FreeBytes = int64(mp.free_bytes)
500 r.AllocObjects = int64(mp.allocs)
501 r.FreeObjects = int64(mp.frees)
502 for i, loc := range b.stk() {
503 if i >= len(r.Stack0) {
504 break
506 r.Stack0[i] = loc.pc
508 for i := int(b.nstk); i < len(r.Stack0); i++ {
509 r.Stack0[i] = 0
513 func iterate_memprof(fn func(*bucket, uintptr, *location, uintptr, uintptr, uintptr)) {
514 lock(&proflock)
515 for b := mbuckets; b != nil; b = b.allnext {
516 mp := b.mp()
517 fn(b, b.nstk, &b.stk()[0], b.size, mp.allocs, mp.frees)
519 unlock(&proflock)
522 // BlockProfileRecord describes blocking events originated
523 // at a particular call sequence (stack trace).
524 type BlockProfileRecord struct {
525 Count int64
526 Cycles int64
527 StackRecord
530 // BlockProfile returns n, the number of records in the current blocking profile.
531 // If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
532 // If len(p) < n, BlockProfile does not change p and returns n, false.
534 // Most clients should use the runtime/pprof package or
535 // the testing package's -test.blockprofile flag instead
536 // of calling BlockProfile directly.
537 func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
538 lock(&proflock)
539 for b := bbuckets; b != nil; b = b.allnext {
542 if n <= len(p) {
543 ok = true
544 for b := bbuckets; b != nil; b = b.allnext {
545 bp := b.bp()
546 r := &p[0]
547 r.Count = bp.count
548 r.Cycles = bp.cycles
549 i := 0
550 var loc location
551 for i, loc = range b.stk() {
552 if i >= len(r.Stack0) {
553 break
555 r.Stack0[i] = loc.pc
557 for ; i < len(r.Stack0); i++ {
558 r.Stack0[i] = 0
560 p = p[1:]
563 unlock(&proflock)
564 return
567 // MutexProfile returns n, the number of records in the current mutex profile.
568 // If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
569 // Otherwise, MutexProfile does not change p, and returns n, false.
571 // Most clients should use the runtime/pprof package
572 // instead of calling MutexProfile directly.
573 func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
574 lock(&proflock)
575 for b := xbuckets; b != nil; b = b.allnext {
578 if n <= len(p) {
579 ok = true
580 for b := xbuckets; b != nil; b = b.allnext {
581 bp := b.bp()
582 r := &p[0]
583 r.Count = int64(bp.count)
584 r.Cycles = bp.cycles
585 i := 0
586 var loc location
587 for i, loc = range b.stk() {
588 if i >= len(r.Stack0) {
589 break
591 r.Stack0[i] = loc.pc
593 for ; i < len(r.Stack0); i++ {
594 r.Stack0[i] = 0
596 p = p[1:]
599 unlock(&proflock)
600 return
603 // ThreadCreateProfile returns n, the number of records in the thread creation profile.
604 // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
605 // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
607 // Most clients should use the runtime/pprof package instead
608 // of calling ThreadCreateProfile directly.
609 func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
610 first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
611 for mp := first; mp != nil; mp = mp.alllink {
614 if n <= len(p) {
615 ok = true
616 i := 0
617 for mp := first; mp != nil; mp = mp.alllink {
618 for j := range mp.createstack {
619 p[i].Stack0[j] = mp.createstack[j].pc
624 return
627 // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
628 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
629 // If len(p) < n, GoroutineProfile does not change p and returns n, false.
631 // Most clients should use the runtime/pprof package instead
632 // of calling GoroutineProfile directly.
633 func GoroutineProfile(p []StackRecord) (n int, ok bool) {
634 gp := getg()
636 isOK := func(gp1 *g) bool {
637 // Checking isSystemGoroutine here makes GoroutineProfile
638 // consistent with both NumGoroutine and Stack.
639 return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1)
642 stopTheWorld("profile")
644 n = 1
645 for _, gp1 := range allgs {
646 if isOK(gp1) {
651 if n <= len(p) {
652 ok = true
653 r := p
655 // Save current goroutine.
656 saveg(gp, &r[0])
657 r = r[1:]
659 // Save other goroutines.
660 for _, gp1 := range allgs {
661 if isOK(gp1) {
662 if len(r) == 0 {
663 // Should be impossible, but better to return a
664 // truncated profile than to crash the entire process.
665 break
667 saveg(gp1, &r[0])
668 r = r[1:]
673 startTheWorld()
675 return n, ok
678 func saveg(gp *g, r *StackRecord) {
679 if gp == getg() {
680 var locbuf [32]location
681 n := callers(1, locbuf[:])
682 for i := 0; i < n; i++ {
683 r.Stack0[i] = locbuf[i].pc
685 if n < len(r.Stack0) {
686 r.Stack0[n] = 0
688 } else {
689 // FIXME: Not implemented.
690 r.Stack0[0] = 0
694 // Stack formats a stack trace of the calling goroutine into buf
695 // and returns the number of bytes written to buf.
696 // If all is true, Stack formats stack traces of all other goroutines
697 // into buf after the trace for the current goroutine.
698 func Stack(buf []byte, all bool) int {
699 if all {
700 stopTheWorld("stack trace")
703 n := 0
704 if len(buf) > 0 {
705 gp := getg()
706 // Force traceback=1 to override GOTRACEBACK setting,
707 // so that Stack's results are consistent.
708 // GOTRACEBACK is only about crash dumps.
709 gp.m.traceback = 1
710 gp.writebuf = buf[0:0:len(buf)]
711 goroutineheader(gp)
712 traceback(1)
713 if all {
714 tracebackothers(gp)
716 gp.m.traceback = 0
717 n = len(gp.writebuf)
718 gp.writebuf = nil
721 if all {
722 startTheWorld()
724 return n
727 // Tracing of alloc/free/gc.
729 var tracelock mutex
731 func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
732 lock(&tracelock)
733 gp := getg()
734 gp.m.traceback = 2
735 if typ == nil {
736 print("tracealloc(", p, ", ", hex(size), ")\n")
737 } else {
738 print("tracealloc(", p, ", ", hex(size), ", ", *typ.string, ")\n")
740 if gp.m.curg == nil || gp == gp.m.curg {
741 goroutineheader(gp)
742 traceback(1)
743 } else {
744 goroutineheader(gp.m.curg)
745 // FIXME: Can't do traceback of other g.
747 print("\n")
748 gp.m.traceback = 0
749 unlock(&tracelock)
752 func tracefree(p unsafe.Pointer, size uintptr) {
753 lock(&tracelock)
754 gp := getg()
755 gp.m.traceback = 2
756 print("tracefree(", p, ", ", hex(size), ")\n")
757 goroutineheader(gp)
758 traceback(1)
759 print("\n")
760 gp.m.traceback = 0
761 unlock(&tracelock)
764 func tracegc() {
765 lock(&tracelock)
766 gp := getg()
767 gp.m.traceback = 2
768 print("tracegc()\n")
769 // running on m->g0 stack; show all non-g0 goroutines
770 tracebackothers(gp)
771 print("end tracegc\n")
772 print("\n")
773 gp.m.traceback = 0
774 unlock(&tracelock)