1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
11 "runtime/internal/atomic"
15 // NOTE(rsc): Everything here could use cas if contention became an issue.
18 // All memory allocations are local and do not escape outside of the profiler.
19 // The profiler is forbidden from referring to garbage-collected memory.
23 memProfile bucketType
= 1 + iota
27 // size of bucket hash table
30 // max depth of stack to record in bucket
36 // A bucket holds per-call-stack profiling information.
37 // The representation is a bit sleazy, inherited from C.
38 // This struct defines the bucket header. It is followed in
39 // memory by the stack words and then the actual record
40 // data, either a memRecord or a blockRecord.
42 // Per-call-stack profiling information.
43 // Lookup by hashing call stack into a linked-list hash table.
51 typ bucketType
// memBucket or blockBucket (includes mutexProfile)
57 // A memRecord is the bucket data for a bucket of type memProfile,
58 // part of the memory profile.
59 type memRecord
struct {
60 // The following complex 3-stage scheme of stats accumulation
61 // is required to obtain a consistent picture of mallocs and frees
62 // for some point in time.
63 // The problem is that mallocs come in real time, while frees
64 // come only after a GC during concurrent sweeping. So if we would
65 // naively count them, we would get a skew toward mallocs.
67 // Hence, we delay information to get consistent snapshots as
68 // of mark termination. Allocations count toward the next mark
69 // termination's snapshot, while sweep frees count toward the
70 // previous mark termination's snapshot:
74 // .·˙ | .·˙ | .·˙ | .·˙ |
75 // .·˙ | .·˙ | .·˙ | .·˙ |
76 // .·˙ |.·˙ |.·˙ |.·˙ |
86 // Since we can't publish a consistent snapshot until all of
87 // the sweep frees are accounted for, we wait until the next
88 // mark termination ("MT" above) to publish the previous mark
89 // termination's snapshot ("P" above). To do this, allocation
90 // and free events are accounted to *future* heap profile
91 // cycles ("C+n" above) and we only publish a cycle once all
92 // of the events from that cycle must be done. Specifically:
94 // Mallocs are accounted to cycle C+2.
95 // Explicit frees are accounted to cycle C+2.
96 // GC frees (done during sweeping) are accounted to cycle C+1.
98 // After mark termination, we increment the global heap
99 // profile cycle counter and accumulate the stats from cycle C
100 // into the active profile.
102 // active is the currently published profile. A profiling
103 // cycle can be accumulated into active once its complete.
104 active memRecordCycle
106 // future records the profile events we're counting for cycles
107 // that have not yet been published. This is ring buffer
108 // indexed by the global heap profile cycle C and stores
109 // cycles C, C+1, and C+2. Unlike active, these counts are
110 // only for a single cycle; they are not cumulative across
113 // We store cycle C here because there's a window between when
114 // C becomes the active cycle and when we've flushed it to
116 future
[3]memRecordCycle
120 type memRecordCycle
struct {
121 allocs
, frees
uintptr
122 alloc_bytes
, free_bytes
uintptr
125 // add accumulates b into a. It does not zero b.
126 func (a
*memRecordCycle
) add(b
*memRecordCycle
) {
129 a
.alloc_bytes
+= b
.alloc_bytes
130 a
.free_bytes
+= b
.free_bytes
133 // A blockRecord is the bucket data for a bucket of type blockProfile,
134 // which is used in blocking and mutex profiles.
135 type blockRecord
struct {
141 mbuckets
*bucket
// memory profile buckets
142 bbuckets
*bucket
// blocking profile buckets
143 xbuckets
*bucket
// mutex profile buckets
144 buckhash
*[179999]*bucket
148 // All fields in mProf are protected by proflock.
150 // cycle is the global heap profile cycle. This wraps
151 // at mProfCycleWrap.
153 // flushed indicates that future[cycle] in all buckets
154 // has been flushed to the active profile.
159 const mProfCycleWrap
= uint32(len(memRecord
{}.future
)) * (2 << 24)
161 // newBucket allocates a bucket with the given type and number of stack entries.
162 func newBucket(typ bucketType
, nstk
int) *bucket
{
163 size
:= unsafe
.Sizeof(bucket
{}) + uintptr(nstk
)*unsafe
.Sizeof(location
{})
166 throw("invalid profile bucket type")
168 size
+= unsafe
.Sizeof(memRecord
{})
169 case blockProfile
, mutexProfile
:
170 size
+= unsafe
.Sizeof(blockRecord
{})
173 b
:= (*bucket
)(persistentalloc(size
, 0, &memstats
.buckhash_sys
))
176 b
.nstk
= uintptr(nstk
)
180 // stk returns the slice in b holding the stack.
181 func (b
*bucket
) stk() []location
{
182 stk
:= (*[maxStack
]location
)(add(unsafe
.Pointer(b
), unsafe
.Sizeof(*b
)))
183 return stk
[:b
.nstk
:b
.nstk
]
186 // mp returns the memRecord associated with the memProfile bucket b.
187 func (b
*bucket
) mp() *memRecord
{
188 if b
.typ
!= memProfile
{
189 throw("bad use of bucket.mp")
191 data
:= add(unsafe
.Pointer(b
), unsafe
.Sizeof(*b
)+b
.nstk
*unsafe
.Sizeof(location
{}))
192 return (*memRecord
)(data
)
195 // bp returns the blockRecord associated with the blockProfile bucket b.
196 func (b
*bucket
) bp() *blockRecord
{
197 if b
.typ
!= blockProfile
&& b
.typ
!= mutexProfile
{
198 throw("bad use of bucket.bp")
200 data
:= add(unsafe
.Pointer(b
), unsafe
.Sizeof(*b
)+b
.nstk
*unsafe
.Sizeof(location
{}))
201 return (*blockRecord
)(data
)
204 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
205 func stkbucket(typ bucketType
, size
uintptr, stk
[]location
, alloc
bool) *bucket
{
207 buckhash
= (*[buckHashSize
]*bucket
)(sysAlloc(unsafe
.Sizeof(*buckhash
), &memstats
.buckhash_sys
))
209 throw("runtime: cannot allocate memory")
215 for _
, loc
:= range stk
{
228 i
:= int(h
% buckHashSize
)
229 for b
:= buckhash
[i
]; b
!= nil; b
= b
.next
{
230 if b
.typ
== typ
&& b
.hash
== h
&& b
.size
== size
&& eqslice(b
.stk(), stk
) {
239 // Create new bucket.
240 b
:= newBucket(typ
, len(stk
))
246 if typ
== memProfile
{
249 } else if typ
== mutexProfile
{
259 func eqslice(x
, y
[]location
) bool {
260 if len(x
) != len(y
) {
263 for i
, xi
:= range x
{
271 // mProf_NextCycle publishes the next heap profile cycle and creates a
272 // fresh heap profile cycle. This operation is fast and can be done
273 // during STW. The caller must call mProf_Flush before calling
274 // mProf_NextCycle again.
276 // This is called by mark termination during STW so allocations and
277 // frees after the world is started again count towards a new heap
279 func mProf_NextCycle() {
281 // We explicitly wrap mProf.cycle rather than depending on
282 // uint wraparound because the memRecord.future ring does not
283 // itself wrap at a power of two.
284 mProf
.cycle
= (mProf
.cycle
+ 1) % mProfCycleWrap
285 mProf
.flushed
= false
289 // mProf_Flush flushes the events from the current heap profiling
290 // cycle into the active profile. After this it is safe to start a new
291 // heap profiling cycle with mProf_NextCycle.
293 // This is called by GC after mark termination starts the world. In
294 // contrast with mProf_NextCycle, this is somewhat expensive, but safe
295 // to do concurrently.
305 func mProf_FlushLocked() {
307 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
310 // Flush cycle C into the published profile and clear
312 mpc
:= &mp
.future
[c%uint
32(len(mp
.future
))]
314 *mpc
= memRecordCycle
{}
318 // mProf_PostSweep records that all sweep frees for this GC cycle have
319 // completed. This has the effect of publishing the heap profile
320 // snapshot as of the last mark termination without advancing the heap
322 func mProf_PostSweep() {
324 // Flush cycle C+1 to the active profile so everything as of
325 // the last mark termination becomes visible. *Don't* advance
326 // the cycle, since we're still accumulating allocs in cycle
327 // C+2, which have to become C+1 in the next mark termination
330 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
332 mpc
:= &mp
.future
[(c
+1)%uint
32(len(mp
.future
))]
334 *mpc
= memRecordCycle
{}
339 // Called by malloc to record a profiled block.
340 func mProf_Malloc(p unsafe
.Pointer
, size
uintptr) {
341 var stk
[maxStack
]location
342 nstk
:= callers(4, stk
[:])
344 b
:= stkbucket(memProfile
, size
, stk
[:nstk
], true)
347 mpc
:= &mp
.future
[(c
+2)%uint
32(len(mp
.future
))]
349 mpc
.alloc_bytes
+= size
352 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
353 // This reduces potential contention and chances of deadlocks.
354 // Since the object must be alive during call to mProf_Malloc,
355 // it's fine to do this non-atomically.
357 setprofilebucket(p
, b
)
361 // Called when freeing a profiled block.
362 func mProf_Free(b
*bucket
, size
uintptr) {
366 mpc
:= &mp
.future
[(c
+1)%uint
32(len(mp
.future
))]
368 mpc
.free_bytes
+= size
372 var blockprofilerate
uint64 // in CPU ticks
374 // SetBlockProfileRate controls the fraction of goroutine blocking events
375 // that are reported in the blocking profile. The profiler aims to sample
376 // an average of one blocking event per rate nanoseconds spent blocked.
378 // To include every blocking event in the profile, pass rate = 1.
379 // To turn off profiling entirely, pass rate <= 0.
380 func SetBlockProfileRate(rate
int) {
383 r
= 0 // disable profiling
384 } else if rate
== 1 {
385 r
= 1 // profile everything
387 // convert ns to cycles, use float64 to prevent overflow during multiplication
388 r
= int64(float64(rate
) * float64(tickspersecond()) / (1000 * 1000 * 1000))
394 atomic
.Store64(&blockprofilerate
, uint64(r
))
397 func blockevent(cycles
int64, skip
int) {
401 if blocksampled(cycles
) {
402 saveblockevent(cycles
, skip
+1, blockProfile
)
406 func blocksampled(cycles
int64) bool {
407 rate
:= int64(atomic
.Load64(&blockprofilerate
))
408 if rate
<= 0 ||
(rate
> cycles
&& int64(fastrand())%rate
> cycles
) {
414 func saveblockevent(cycles
int64, skip
int, which bucketType
) {
417 var stk
[maxStack
]location
418 if gp
.m
.curg
== nil || gp
.m
.curg
== gp
{
419 nstk
= callers(skip
, stk
[:])
421 // FIXME: This should get a traceback of gp.m.curg.
422 // nstk = gcallers(gp.m.curg, skip, stk[:])
423 nstk
= callers(skip
, stk
[:])
426 b
:= stkbucket(which
, 0, stk
[:nstk
], true)
428 b
.bp().cycles
+= cycles
432 var mutexprofilerate
uint64 // fraction sampled
434 // SetMutexProfileFraction controls the fraction of mutex contention events
435 // that are reported in the mutex profile. On average 1/rate events are
436 // reported. The previous rate is returned.
438 // To turn off profiling entirely, pass rate 0.
439 // To just read the current rate, pass rate -1.
440 // (For n>1 the details of sampling may change.)
441 func SetMutexProfileFraction(rate
int) int {
443 return int(mutexprofilerate
)
445 old
:= mutexprofilerate
446 atomic
.Store64(&mutexprofilerate
, uint64(rate
))
450 //go:linkname mutexevent sync.event
451 func mutexevent(cycles
int64, skip
int) {
455 rate
:= int64(atomic
.Load64(&mutexprofilerate
))
456 // TODO(pjw): measure impact of always calling fastrand vs using something
457 // like malloc.go:nextSample()
458 if rate
> 0 && int64(fastrand())%rate
== 0 {
459 saveblockevent(cycles
, skip
+1, mutexProfile
)
463 // Go interface to profile data.
465 // A StackRecord describes a single execution stack.
466 type StackRecord
struct {
467 Stack0
[32]uintptr // stack trace for this record; ends at first 0 entry
470 // Stack returns the stack trace associated with the record,
471 // a prefix of r.Stack0.
472 func (r
*StackRecord
) Stack() []uintptr {
473 for i
, v
:= range r
.Stack0
{
481 // MemProfileRate controls the fraction of memory allocations
482 // that are recorded and reported in the memory profile.
483 // The profiler aims to sample an average of
484 // one allocation per MemProfileRate bytes allocated.
486 // To include every allocated block in the profile, set MemProfileRate to 1.
487 // To turn off profiling entirely, set MemProfileRate to 0.
489 // The tools that process the memory profiles assume that the
490 // profile rate is constant across the lifetime of the program
491 // and equal to the current value. Programs that change the
492 // memory profiling rate should do so just once, as early as
493 // possible in the execution of the program (for example,
494 // at the beginning of main).
495 var MemProfileRate
int = 512 * 1024
497 // A MemProfileRecord describes the live objects allocated
498 // by a particular call sequence (stack trace).
499 type MemProfileRecord
struct {
500 AllocBytes
, FreeBytes
int64 // number of bytes allocated, freed
501 AllocObjects
, FreeObjects
int64 // number of objects allocated, freed
502 Stack0
[32]uintptr // stack trace for this record; ends at first 0 entry
505 // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
506 func (r
*MemProfileRecord
) InUseBytes() int64 { return r
.AllocBytes
- r
.FreeBytes
}
508 // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
509 func (r
*MemProfileRecord
) InUseObjects() int64 {
510 return r
.AllocObjects
- r
.FreeObjects
513 // Stack returns the stack trace associated with the record,
514 // a prefix of r.Stack0.
515 func (r
*MemProfileRecord
) Stack() []uintptr {
516 for i
, v
:= range r
.Stack0
{
524 // MemProfile returns a profile of memory allocated and freed per allocation
527 // MemProfile returns n, the number of records in the current memory profile.
528 // If len(p) >= n, MemProfile copies the profile into p and returns n, true.
529 // If len(p) < n, MemProfile does not change p and returns n, false.
531 // If inuseZero is true, the profile includes allocation records
532 // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
533 // These are sites where memory was allocated, but it has all
534 // been released back to the runtime.
536 // The returned profile may be up to two garbage collection cycles old.
537 // This is to avoid skewing the profile toward allocations; because
538 // allocations happen in real time but frees are delayed until the garbage
539 // collector performs sweeping, the profile only accounts for allocations
540 // that have had a chance to be freed by the garbage collector.
542 // Most clients should use the runtime/pprof package or
543 // the testing package's -test.memprofile flag instead
544 // of calling MemProfile directly.
545 func MemProfile(p
[]MemProfileRecord
, inuseZero
bool) (n
int, ok
bool) {
547 // If we're between mProf_NextCycle and mProf_Flush, take care
548 // of flushing to the active profile so we only have to look
549 // at the active profile below.
552 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
554 if inuseZero || mp
.active
.alloc_bytes
!= mp
.active
.free_bytes
{
557 if mp
.active
.allocs
!= 0 || mp
.active
.frees
!= 0 {
562 // Absolutely no data, suggesting that a garbage collection
563 // has not yet happened. In order to allow profiling when
564 // garbage collection is disabled from the beginning of execution,
565 // accumulate all of the cycles, and recount buckets.
567 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
569 for c
:= range mp
.future
{
570 mp
.active
.add(&mp
.future
[c
])
571 mp
.future
[c
] = memRecordCycle
{}
573 if inuseZero || mp
.active
.alloc_bytes
!= mp
.active
.free_bytes
{
581 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
583 if inuseZero || mp
.active
.alloc_bytes
!= mp
.active
.free_bytes
{
593 // Write b's data to r.
594 func record(r
*MemProfileRecord
, b
*bucket
) {
596 r
.AllocBytes
= int64(mp
.active
.alloc_bytes
)
597 r
.FreeBytes
= int64(mp
.active
.free_bytes
)
598 r
.AllocObjects
= int64(mp
.active
.allocs
)
599 r
.FreeObjects
= int64(mp
.active
.frees
)
600 for i
, loc
:= range b
.stk() {
601 if i
>= len(r
.Stack0
) {
606 for i
:= int(b
.nstk
); i
< len(r
.Stack0
); i
++ {
611 func iterate_memprof(fn
func(*bucket
, uintptr, *location
, uintptr, uintptr, uintptr)) {
613 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
615 fn(b
, b
.nstk
, &b
.stk()[0], b
.size
, mp
.active
.allocs
, mp
.active
.frees
)
620 // BlockProfileRecord describes blocking events originated
621 // at a particular call sequence (stack trace).
622 type BlockProfileRecord
struct {
628 // BlockProfile returns n, the number of records in the current blocking profile.
629 // If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
630 // If len(p) < n, BlockProfile does not change p and returns n, false.
632 // Most clients should use the runtime/pprof package or
633 // the testing package's -test.blockprofile flag instead
634 // of calling BlockProfile directly.
635 func BlockProfile(p
[]BlockProfileRecord
) (n
int, ok
bool) {
637 for b
:= bbuckets
; b
!= nil; b
= b
.allnext
{
642 for b
:= bbuckets
; b
!= nil; b
= b
.allnext
{
649 for i
, loc
= range b
.stk() {
650 if i
>= len(r
.Stack0
) {
655 for ; i
< len(r
.Stack0
); i
++ {
665 // MutexProfile returns n, the number of records in the current mutex profile.
666 // If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
667 // Otherwise, MutexProfile does not change p, and returns n, false.
669 // Most clients should use the runtime/pprof package
670 // instead of calling MutexProfile directly.
671 func MutexProfile(p
[]BlockProfileRecord
) (n
int, ok
bool) {
673 for b
:= xbuckets
; b
!= nil; b
= b
.allnext
{
678 for b
:= xbuckets
; b
!= nil; b
= b
.allnext
{
681 r
.Count
= int64(bp
.count
)
685 for i
, loc
= range b
.stk() {
686 if i
>= len(r
.Stack0
) {
691 for ; i
< len(r
.Stack0
); i
++ {
701 // ThreadCreateProfile returns n, the number of records in the thread creation profile.
702 // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
703 // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
705 // Most clients should use the runtime/pprof package instead
706 // of calling ThreadCreateProfile directly.
707 func ThreadCreateProfile(p
[]StackRecord
) (n
int, ok
bool) {
708 first
:= (*m
)(atomic
.Loadp(unsafe
.Pointer(&allm
)))
709 for mp
:= first
; mp
!= nil; mp
= mp
.alllink
{
715 for mp
:= first
; mp
!= nil; mp
= mp
.alllink
{
716 for j
:= range mp
.createstack
{
717 p
[i
].Stack0
[j
] = mp
.createstack
[j
].pc
725 // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
726 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
727 // If len(p) < n, GoroutineProfile does not change p and returns n, false.
729 // Most clients should use the runtime/pprof package instead
730 // of calling GoroutineProfile directly.
731 func GoroutineProfile(p
[]StackRecord
) (n
int, ok
bool) {
734 isOK
:= func(gp1
*g
) bool {
735 // Checking isSystemGoroutine here makes GoroutineProfile
736 // consistent with both NumGoroutine and Stack.
737 return gp1
!= gp
&& readgstatus(gp1
) != _Gdead
&& !isSystemGoroutine(gp1
)
740 stopTheWorld("profile")
743 for _
, gp1
:= range allgs
{
753 // Save current goroutine.
757 // Save other goroutines.
758 for _
, gp1
:= range allgs
{
761 // Should be impossible, but better to return a
762 // truncated profile than to crash the entire process.
776 func saveg(gp
*g
, r
*StackRecord
) {
778 var locbuf
[32]location
779 n
:= callers(1, locbuf
[:])
780 for i
:= 0; i
< n
; i
++ {
781 r
.Stack0
[i
] = locbuf
[i
].pc
783 if n
< len(r
.Stack0
) {
787 // FIXME: Not implemented.
792 // Stack formats a stack trace of the calling goroutine into buf
793 // and returns the number of bytes written to buf.
794 // If all is true, Stack formats stack traces of all other goroutines
795 // into buf after the trace for the current goroutine.
796 func Stack(buf
[]byte, all
bool) int {
798 stopTheWorld("stack trace")
804 // Force traceback=1 to override GOTRACEBACK setting,
805 // so that Stack's results are consistent.
806 // GOTRACEBACK is only about crash dumps.
808 gp
.writebuf
= buf
[0:0:len(buf
)]
825 // Tracing of alloc/free/gc.
829 func tracealloc(p unsafe
.Pointer
, size
uintptr, typ
*_type
) {
834 print("tracealloc(", p
, ", ", hex(size
), ")\n")
836 print("tracealloc(", p
, ", ", hex(size
), ", ", *typ
.string, ")\n")
838 if gp
.m
.curg
== nil || gp
== gp
.m
.curg
{
842 goroutineheader(gp
.m
.curg
)
843 // FIXME: Can't do traceback of other g.
850 func tracefree(p unsafe
.Pointer
, size
uintptr) {
854 print("tracefree(", p
, ", ", hex(size
), ")\n")
867 // running on m->g0 stack; show all non-g0 goroutines
869 print("end tracegc\n")