1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
14 // NOTE(rsc): Everything here could use cas if contention became an issue.
17 // All memory allocations are local and do not escape outside of the profiler.
18 // The profiler is forbidden from referring to garbage-collected memory.
22 memProfile bucketType
= 1 + iota
25 // size of bucket hash table
28 // max depth of stack to record in bucket
34 // A bucket holds per-call-stack profiling information.
35 // The representation is a bit sleazy, inherited from C.
36 // This struct defines the bucket header. It is followed in
37 // memory by the stack words and then the actual record
38 // data, either a memRecord or a blockRecord.
40 // Per-call-stack profiling information.
41 // Lookup by hashing call stack into a linked-list hash table.
45 typ bucketType
// memBucket or blockBucket
51 // A memRecord is the bucket data for a bucket of type memProfile,
52 // part of the memory profile.
53 type memRecord
struct {
54 // The following complex 3-stage scheme of stats accumulation
55 // is required to obtain a consistent picture of mallocs and frees
56 // for some point in time.
57 // The problem is that mallocs come in real time, while frees
58 // come only after a GC during concurrent sweeping. So if we would
59 // naively count them, we would get a skew toward mallocs.
61 // Mallocs are accounted in recent stats.
62 // Explicit frees are accounted in recent stats.
63 // GC frees are accounted in prev stats.
64 // After GC prev stats are added to final stats and
65 // recent stats are moved into prev stats.
71 // changes between next-to-last GC and last GC
74 prev_alloc_bytes
uintptr
75 prev_free_bytes
uintptr
77 // changes since last GC
80 recent_alloc_bytes
uintptr
81 recent_free_bytes
uintptr
84 // A blockRecord is the bucket data for a bucket of type blockProfile,
85 // part of the blocking profile.
86 type blockRecord
struct {
92 mbuckets
*bucket
// memory profile buckets
93 bbuckets
*bucket
// blocking profile buckets
94 buckhash
*[179999]*bucket
98 // newBucket allocates a bucket with the given type and number of stack entries.
99 func newBucket(typ bucketType
, nstk
int) *bucket
{
100 size
:= unsafe
.Sizeof(bucket
{}) + uintptr(nstk
)*unsafe
.Sizeof(uintptr(0))
103 gothrow("invalid profile bucket type")
105 size
+= unsafe
.Sizeof(memRecord
{})
107 size
+= unsafe
.Sizeof(blockRecord
{})
110 b
:= (*bucket
)(persistentalloc(size
, 0, &memstats
.buckhash_sys
))
113 b
.nstk
= uintptr(nstk
)
117 // stk returns the slice in b holding the stack.
118 func (b
*bucket
) stk() []uintptr {
119 stk
:= (*[maxStack
]uintptr)(add(unsafe
.Pointer(b
), unsafe
.Sizeof(*b
)))
120 return stk
[:b
.nstk
:b
.nstk
]
123 // mp returns the memRecord associated with the memProfile bucket b.
124 func (b
*bucket
) mp() *memRecord
{
125 if b
.typ
!= memProfile
{
126 gothrow("bad use of bucket.mp")
128 data
:= add(unsafe
.Pointer(b
), unsafe
.Sizeof(*b
)+b
.nstk
*unsafe
.Sizeof(uintptr(0)))
129 return (*memRecord
)(data
)
132 // bp returns the blockRecord associated with the blockProfile bucket b.
133 func (b
*bucket
) bp() *blockRecord
{
134 if b
.typ
!= blockProfile
{
135 gothrow("bad use of bucket.bp")
137 data
:= add(unsafe
.Pointer(b
), unsafe
.Sizeof(*b
)+b
.nstk
*unsafe
.Sizeof(uintptr(0)))
138 return (*blockRecord
)(data
)
141 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
142 func stkbucket(typ bucketType
, size
uintptr, stk
[]uintptr, alloc
bool) *bucket
{
144 buckhash
= (*[buckHashSize
]*bucket
)(sysAlloc(unsafe
.Sizeof(*buckhash
), &memstats
.buckhash_sys
))
146 gothrow("runtime: cannot allocate memory")
152 for _
, pc
:= range stk
{
165 i
:= int(h
% buckHashSize
)
166 for b
:= buckhash
[i
]; b
!= nil; b
= b
.next
{
167 if b
.typ
== typ
&& b
.hash
== h
&& b
.size
== size
&& eqslice(b
.stk(), stk
) {
176 // Create new bucket.
177 b
:= newBucket(typ
, len(stk
))
183 if typ
== memProfile
{
193 func sysAlloc(n
uintptr, stat
*uint64) unsafe
.Pointer
195 func eqslice(x
, y
[]uintptr) bool {
196 if len(x
) != len(y
) {
199 for i
, xi
:= range x
{
208 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
210 mp
.allocs
+= mp
.prev_allocs
211 mp
.frees
+= mp
.prev_frees
212 mp
.alloc_bytes
+= mp
.prev_alloc_bytes
213 mp
.free_bytes
+= mp
.prev_free_bytes
215 mp
.prev_allocs
= mp
.recent_allocs
216 mp
.prev_frees
= mp
.recent_frees
217 mp
.prev_alloc_bytes
= mp
.recent_alloc_bytes
218 mp
.prev_free_bytes
= mp
.recent_free_bytes
222 mp
.recent_alloc_bytes
= 0
223 mp
.recent_free_bytes
= 0
227 // Record that a gc just happened: all the 'recent' statistics are now real.
234 // Called by malloc to record a profiled block.
235 func mProf_Malloc(p unsafe
.Pointer
, size
uintptr) {
236 var stk
[maxStack
]uintptr
237 nstk
:= callers(4, &stk
[0], len(stk
))
239 b
:= stkbucket(memProfile
, size
, stk
[:nstk
], true)
242 mp
.recent_alloc_bytes
+= size
245 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
246 // This reduces potential contention and chances of deadlocks.
247 // Since the object must be alive during call to mProf_Malloc,
248 // it's fine to do this non-atomically.
249 setprofilebucket(p
, b
)
252 func setprofilebucket_m() // mheap.c
254 func setprofilebucket(p unsafe
.Pointer
, b
*bucket
) {
257 g
.m
.ptrarg
[1] = unsafe
.Pointer(b
)
258 onM(setprofilebucket_m
)
261 // Called when freeing a profiled block.
262 func mProf_Free(b
*bucket
, size
uintptr, freed
bool) {
267 mp
.recent_free_bytes
+= size
270 mp
.prev_free_bytes
+= size
275 var blockprofilerate
uint64 // in CPU ticks
277 // SetBlockProfileRate controls the fraction of goroutine blocking events
278 // that are reported in the blocking profile. The profiler aims to sample
279 // an average of one blocking event per rate nanoseconds spent blocked.
281 // To include every blocking event in the profile, pass rate = 1.
282 // To turn off profiling entirely, pass rate <= 0.
283 func SetBlockProfileRate(rate
int) {
286 r
= 0 // disable profiling
287 } else if rate
== 1 {
288 r
= 1 // profile everything
290 // convert ns to cycles, use float64 to prevent overflow during multiplication
291 r
= int64(float64(rate
) * float64(tickspersecond()) / (1000 * 1000 * 1000))
297 atomicstore64(&blockprofilerate
, uint64(r
))
300 func blockevent(cycles
int64, skip
int) {
304 rate
:= int64(atomicload64(&blockprofilerate
))
305 if rate
<= 0 ||
(rate
> cycles
&& int64(fastrand1())%rate
> cycles
) {
310 var stk
[maxStack
]uintptr
311 if gp
.m
.curg
== nil || gp
.m
.curg
== gp
{
312 nstk
= callers(skip
, &stk
[0], len(stk
))
314 nstk
= gcallers(gp
.m
.curg
, skip
, &stk
[0], len(stk
))
317 b
:= stkbucket(blockProfile
, 0, stk
[:nstk
], true)
319 b
.bp().cycles
+= cycles
323 // Go interface to profile data.
325 // A StackRecord describes a single execution stack.
326 type StackRecord
struct {
327 Stack0
[32]uintptr // stack trace for this record; ends at first 0 entry
330 // Stack returns the stack trace associated with the record,
331 // a prefix of r.Stack0.
332 func (r
*StackRecord
) Stack() []uintptr {
333 for i
, v
:= range r
.Stack0
{
341 // MemProfileRate controls the fraction of memory allocations
342 // that are recorded and reported in the memory profile.
343 // The profiler aims to sample an average of
344 // one allocation per MemProfileRate bytes allocated.
346 // To include every allocated block in the profile, set MemProfileRate to 1.
347 // To turn off profiling entirely, set MemProfileRate to 0.
349 // The tools that process the memory profiles assume that the
350 // profile rate is constant across the lifetime of the program
351 // and equal to the current value. Programs that change the
352 // memory profiling rate should do so just once, as early as
353 // possible in the execution of the program (for example,
354 // at the beginning of main).
355 var MemProfileRate
int = 512 * 1024
357 // A MemProfileRecord describes the live objects allocated
358 // by a particular call sequence (stack trace).
359 type MemProfileRecord
struct {
360 AllocBytes
, FreeBytes
int64 // number of bytes allocated, freed
361 AllocObjects
, FreeObjects
int64 // number of objects allocated, freed
362 Stack0
[32]uintptr // stack trace for this record; ends at first 0 entry
365 // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
366 func (r
*MemProfileRecord
) InUseBytes() int64 { return r
.AllocBytes
- r
.FreeBytes
}
368 // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
369 func (r
*MemProfileRecord
) InUseObjects() int64 {
370 return r
.AllocObjects
- r
.FreeObjects
373 // Stack returns the stack trace associated with the record,
374 // a prefix of r.Stack0.
375 func (r
*MemProfileRecord
) Stack() []uintptr {
376 for i
, v
:= range r
.Stack0
{
384 // MemProfile returns n, the number of records in the current memory profile.
385 // If len(p) >= n, MemProfile copies the profile into p and returns n, true.
386 // If len(p) < n, MemProfile does not change p and returns n, false.
388 // If inuseZero is true, the profile includes allocation records
389 // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
390 // These are sites where memory was allocated, but it has all
391 // been released back to the runtime.
393 // Most clients should use the runtime/pprof package or
394 // the testing package's -test.memprofile flag instead
395 // of calling MemProfile directly.
396 func MemProfile(p
[]MemProfileRecord
, inuseZero
bool) (n
int, ok
bool) {
399 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
401 if inuseZero || mp
.alloc_bytes
!= mp
.free_bytes
{
404 if mp
.allocs
!= 0 || mp
.frees
!= 0 {
409 // Absolutely no data, suggesting that a garbage collection
410 // has not yet happened. In order to allow profiling when
411 // garbage collection is disabled from the beginning of execution,
412 // accumulate stats as if a GC just happened, and recount buckets.
416 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
418 if inuseZero || mp
.alloc_bytes
!= mp
.free_bytes
{
426 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
428 if inuseZero || mp
.alloc_bytes
!= mp
.free_bytes
{
438 // Write b's data to r.
439 func record(r
*MemProfileRecord
, b
*bucket
) {
441 r
.AllocBytes
= int64(mp
.alloc_bytes
)
442 r
.FreeBytes
= int64(mp
.free_bytes
)
443 r
.AllocObjects
= int64(mp
.allocs
)
444 r
.FreeObjects
= int64(mp
.frees
)
445 copy(r
.Stack0
[:], b
.stk())
446 for i
:= int(b
.nstk
); i
< len(r
.Stack0
); i
++ {
451 func iterate_memprof(fn
func(*bucket
, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
453 for b
:= mbuckets
; b
!= nil; b
= b
.allnext
{
455 fn(b
, uintptr(b
.nstk
), &b
.stk()[0], b
.size
, mp
.allocs
, mp
.frees
)
460 // BlockProfileRecord describes blocking events originated
461 // at a particular call sequence (stack trace).
462 type BlockProfileRecord
struct {
468 // BlockProfile returns n, the number of records in the current blocking profile.
469 // If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
470 // If len(p) < n, BlockProfile does not change p and returns n, false.
472 // Most clients should use the runtime/pprof package or
473 // the testing package's -test.blockprofile flag instead
474 // of calling BlockProfile directly.
475 func BlockProfile(p
[]BlockProfileRecord
) (n
int, ok
bool) {
477 for b
:= bbuckets
; b
!= nil; b
= b
.allnext
{
482 for b
:= bbuckets
; b
!= nil; b
= b
.allnext
{
485 r
.Count
= int64(bp
.count
)
486 r
.Cycles
= int64(bp
.cycles
)
487 i
:= copy(r
.Stack0
[:], b
.stk())
488 for ; i
< len(r
.Stack0
); i
++ {
498 // ThreadCreateProfile returns n, the number of records in the thread creation profile.
499 // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
500 // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
502 // Most clients should use the runtime/pprof package instead
503 // of calling ThreadCreateProfile directly.
504 func ThreadCreateProfile(p
[]StackRecord
) (n
int, ok
bool) {
505 first
:= (*m
)(atomicloadp(unsafe
.Pointer(&allm
)))
506 for mp
:= first
; mp
!= nil; mp
= mp
.alllink
{
512 for mp
:= first
; mp
!= nil; mp
= mp
.alllink
{
513 for s
:= range mp
.createstack
{
514 p
[i
].Stack0
[s
] = uintptr(mp
.createstack
[s
])
522 var allgs
[]*g
// proc.c
524 // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
525 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
526 // If len(p) < n, GoroutineProfile does not change p and returns n, false.
528 // Most clients should use the runtime/pprof package instead
529 // of calling GoroutineProfile directly.
530 func GoroutineProfile(p
[]StackRecord
) (n
int, ok
bool) {
535 semacquire(&worldsema
, false)
543 sp
:= getcallersp(unsafe
.Pointer(&p
))
544 pc
:= getcallerpc(unsafe
.Pointer(&p
))
546 saveg(pc
, sp
, gp
, &r
[0])
549 for _
, gp1
:= range allgs
{
550 if gp1
== gp ||
readgstatus(gp1
) == _Gdead
{
553 saveg(^uintptr(0), ^uintptr(0), gp1
, &r
[0])
559 semrelease(&worldsema
)
566 func saveg(pc
, sp
uintptr, gp
*g
, r
*StackRecord
) {
567 n
:= gentraceback(pc
, sp
, 0, gp
, 0, &r
.Stack0
[0], len(r
.Stack0
), nil, nil, 0)
568 if n
< len(r
.Stack0
) {
573 // Stack formats a stack trace of the calling goroutine into buf
574 // and returns the number of bytes written to buf.
575 // If all is true, Stack formats stack traces of all other goroutines
576 // into buf after the trace for the current goroutine.
577 func Stack(buf
[]byte, all
bool) int {
579 semacquire(&worldsema
, false)
588 sp
:= getcallersp(unsafe
.Pointer(&buf
))
589 pc
:= getcallerpc(unsafe
.Pointer(&buf
))
592 g0
.writebuf
= buf
[0:0:len(buf
)]
594 traceback(pc
, sp
, 0, gp
)
606 semrelease(&worldsema
)
612 // Tracing of alloc/free/gc.
616 func tracealloc(p unsafe
.Pointer
, size
uintptr, typ
*_type
) {
621 print("tracealloc(", p
, ", ", hex(size
), ")\n")
623 print("tracealloc(", p
, ", ", hex(size
), ", ", *typ
._string
, ")\n")
625 if gp
.m
.curg
== nil || gp
== gp
.m
.curg
{
627 pc
:= getcallerpc(unsafe
.Pointer(&p
))
628 sp
:= getcallersp(unsafe
.Pointer(&p
))
630 traceback(pc
, sp
, 0, gp
)
633 goroutineheader(gp
.m
.curg
)
634 traceback(^uintptr(0), ^uintptr(0), 0, gp
.m
.curg
)
641 func tracefree(p unsafe
.Pointer
, size
uintptr) {
645 print("tracefree(", p
, ", ", hex(size
), ")\n")
647 pc
:= getcallerpc(unsafe
.Pointer(&p
))
648 sp
:= getcallersp(unsafe
.Pointer(&p
))
650 traceback(pc
, sp
, 0, gp
)
662 // running on m->g0 stack; show all non-g0 goroutines
664 print("end tracegc\n")