1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
14 #include "go-string.h"
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
19 // All memory allocations are local and do not escape outside of the profiler.
20 // The profiler is forbidden from referring to garbage-collected memory.
22 enum { MProf, BProf }; // profile types
24 // Per-call-stack profiling information.
25 // Lookup by hashing call stack into a linked-list hash table.
28 Bucket *next; // next in hash list
29 Bucket *allnext; // next in list of all mbuckets/bbuckets
31 // Generally unions can break precise GC,
32 // this one is fine because it does not contain pointers.
35 struct // typ == MProf
37 // The following complex 3-stage scheme of stats accumulation
38 // is required to obtain a consistent picture of mallocs and frees
39 // for some point in time.
40 // The problem is that mallocs come in real time, while frees
41 // come only after a GC during concurrent sweeping. So if we would
42 // naively count them, we would get a skew toward mallocs.
44 // Mallocs are accounted in recent stats.
45 // Explicit frees are accounted in recent stats.
46 // GC frees are accounted in prev stats.
47 // After GC prev stats are added to final stats and
48 // recent stats are moved into prev stats.
54 uintptr prev_allocs; // since last but one till last gc
56 uintptr prev_alloc_bytes;
57 uintptr prev_free_bytes;
59 uintptr recent_allocs; // since last gc till now
61 uintptr recent_alloc_bytes;
62 uintptr recent_free_bytes;
65 struct // typ == BProf
71 uintptr hash; // hash of size + stk
77 BuckHashSize = 179999,
79 static Bucket **buckhash;
80 static Bucket *mbuckets; // memory profile buckets
81 static Bucket *bbuckets; // blocking profile buckets
82 static uintptr bucketmem;
84 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
86 stkbucket(int32 typ, uintptr size, Location *stk, int32 nstk, bool alloc)
93 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys);
95 runtime_throw("runtime: cannot allocate memory");
100 for(i=0; i<nstk; i++) {
114 for(b = buckhash[i]; b; b=b->next) {
115 if(b->typ == typ && b->hash == h && b->size == size && b->nstk == (uintptr)nstk) {
116 for(j = 0; j < nstk; j++) {
117 if(b->stk[j].pc != stk[j].pc ||
118 b->stk[j].lineno != stk[j].lineno ||
119 !__go_strings_equal(b->stk[j].filename, stk[j].filename))
130 b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys);
131 bucketmem += sizeof *b + nstk*sizeof stk[0];
132 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
137 b->next = buckhash[i];
140 b->allnext = mbuckets;
143 b->allnext = bbuckets;
154 for(b=mbuckets; b; b=b->allnext) {
155 b->allocs += b->prev_allocs;
156 b->frees += b->prev_frees;
157 b->alloc_bytes += b->prev_alloc_bytes;
158 b->free_bytes += b->prev_free_bytes;
160 b->prev_allocs = b->recent_allocs;
161 b->prev_frees = b->recent_frees;
162 b->prev_alloc_bytes = b->recent_alloc_bytes;
163 b->prev_free_bytes = b->recent_free_bytes;
165 b->recent_allocs = 0;
167 b->recent_alloc_bytes = 0;
168 b->recent_free_bytes = 0;
172 // Record that a gc just happened: all the 'recent' statistics are now real.
174 runtime_MProf_GC(void)
176 runtime_lock(&proflock);
178 runtime_unlock(&proflock);
181 // Called by malloc to record a profiled block.
183 runtime_MProf_Malloc(void *p, uintptr size)
189 nstk = runtime_callers(1, stk, nelem(stk), false);
190 runtime_lock(&proflock);
191 b = stkbucket(MProf, size, stk, nstk, true);
193 b->recent_alloc_bytes += size;
194 runtime_unlock(&proflock);
196 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
197 // This reduces potential contention and chances of deadlocks.
198 // Since the object must be alive during call to MProf_Malloc,
199 // it's fine to do this non-atomically.
200 runtime_setprofilebucket(p, b);
203 // Called when freeing a profiled block.
205 runtime_MProf_Free(Bucket *b, uintptr size, bool freed)
207 runtime_lock(&proflock);
210 b->recent_free_bytes += size;
213 b->prev_free_bytes += size;
215 runtime_unlock(&proflock);
218 int64 runtime_blockprofilerate; // in CPU ticks
220 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
223 runtime_SetBlockProfileRate(intgo rate)
228 r = 0; // disable profiling
230 // convert ns to cycles, use float64 to prevent overflow during multiplication
231 r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000);
235 runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r);
239 runtime_blockevent(int64 cycles, int32 skip)
248 rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
249 if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
252 nstk = runtime_callers(skip, stk, nelem(stk), false);
253 runtime_lock(&proflock);
254 b = stkbucket(BProf, 0, stk, nstk, true);
257 runtime_unlock(&proflock);
260 // Go interface to profile data. (Declared in debug.go)
262 // Must match MemProfileRecord in debug.go.
263 typedef struct Record Record;
265 int64 alloc_bytes, free_bytes;
266 int64 alloc_objects, free_objects;
270 // Write b's data to r.
272 record(Record *r, Bucket *b)
276 r->alloc_bytes = b->alloc_bytes;
277 r->free_bytes = b->free_bytes;
278 r->alloc_objects = b->allocs;
279 r->free_objects = b->frees;
280 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
281 r->stk[i] = b->stk[i].pc;
282 for(; i<nelem(r->stk); i++)
286 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
291 runtime_lock(&proflock);
294 for(b=mbuckets; b; b=b->allnext) {
295 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
297 if(b->allocs != 0 || b->frees != 0)
301 // Absolutely no data, suggesting that a garbage collection
302 // has not yet happened. In order to allow profiling when
303 // garbage collection is disabled from the beginning of execution,
304 // accumulate stats as if a GC just happened, and recount buckets.
308 for(b=mbuckets; b; b=b->allnext)
309 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
315 r = (Record*)p.__values;
316 for(b=mbuckets; b; b=b->allnext)
317 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
320 runtime_unlock(&proflock);
324 runtime_MProf_Mark(struct Workbuf **wbufp, void (*enqueue1)(struct Workbuf**, Obj))
326 // buckhash is not allocated via mallocgc.
327 enqueue1(wbufp, (Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
328 enqueue1(wbufp, (Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
332 runtime_iterate_memprof(void (*callback)(Bucket*, uintptr, Location*, uintptr, uintptr, uintptr))
336 runtime_lock(&proflock);
337 for(b=mbuckets; b; b=b->allnext) {
338 callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees);
340 runtime_unlock(&proflock);
343 // Must match BlockProfileRecord in debug.go.
344 typedef struct BRecord BRecord;
351 func BlockProfile(p Slice) (n int, ok bool) {
356 runtime_lock(&proflock);
358 for(b=bbuckets; b; b=b->allnext)
363 r = (BRecord*)p.__values;
364 for(b=bbuckets; b; b=b->allnext, r++) {
366 r->cycles = b->cycles;
367 for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
368 r->stk[i] = b->stk[i].pc;
369 for(; (uintptr)i<nelem(r->stk); i++)
373 runtime_unlock(&proflock);
376 // Must match StackRecord in debug.go.
377 typedef struct TRecord TRecord;
382 func ThreadCreateProfile(p Slice) (n int, ok bool) {
387 first = runtime_atomicloadp(&runtime_allm);
389 for(mp=first; mp; mp=mp->alllink)
394 r = (TRecord*)p.__values;
395 for(mp=first; mp; mp=mp->alllink) {
396 for(i = 0; (uintptr)i < nelem(r->stk); i++) {
397 r->stk[i] = mp->createstack[i].pc;
404 func Stack(b Slice, all bool) (n int) {
408 sp = runtime_getcallersp(&b);
409 pc = (byte*)(uintptr)runtime_getcallerpc(&b);
412 runtime_semacquire(&runtime_worldsema, false);
413 runtime_m()->gcing = 1;
414 runtime_stoptheworld();
415 enablegc = mstats.enablegc;
416 mstats.enablegc = false;
423 g->writebuf = (byte*)b.__values;
424 g->writenbuf = b.__count;
427 runtime_goroutineheader(g);
429 runtime_printcreatedby(g);
431 runtime_tracebackothers(g);
432 n = b.__count - g->writenbuf;
438 runtime_m()->gcing = 0;
439 mstats.enablegc = enablegc;
440 runtime_semrelease(&runtime_worldsema);
441 runtime_starttheworld();
446 saveg(G *gp, TRecord *r)
449 Location locstk[nelem(r->stk)];
451 if(gp == runtime_g()) {
452 n = runtime_callers(0, locstk, nelem(r->stk), false);
453 for(i = 0; i < n; i++)
454 r->stk[i] = locstk[i].pc;
457 // FIXME: Not implemented.
460 if((size_t)n < nelem(r->stk))
464 func GoroutineProfile(b Slice) (n int, ok bool) {
470 n = runtime_gcount();
472 runtime_semacquire(&runtime_worldsema, false);
473 runtime_m()->gcing = 1;
474 runtime_stoptheworld();
476 n = runtime_gcount();
480 r = (TRecord*)b.__values;
482 for(i = 0; i < runtime_allglen; i++) {
483 gp = runtime_allg[i];
484 if(gp == g || gp->status == Gdead)
490 runtime_m()->gcing = 0;
491 runtime_semrelease(&runtime_worldsema);
492 runtime_starttheworld();
496 // Tracing of alloc/free/gc.
498 static Lock tracelock;
501 typeinfoname(int32 typeinfo)
503 if(typeinfo == TypeInfo_SingleObject)
504 return "single object";
505 else if(typeinfo == TypeInfo_Array)
507 else if(typeinfo == TypeInfo_Chan)
509 runtime_throw("typinfoname: unknown type info");
514 runtime_tracealloc(void *p, uintptr size, uintptr typ)
519 runtime_lock(&tracelock);
520 runtime_m()->traceback = 2;
521 type = (Type*)(typ & ~3);
522 name = typeinfoname(typ & 3);
524 runtime_printf("tracealloc(%p, %p, %s)\n", p, size, name);
526 runtime_printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->__reflection);
527 if(runtime_m()->curg == nil || runtime_g() == runtime_m()->curg) {
528 runtime_goroutineheader(runtime_g());
531 runtime_goroutineheader(runtime_m()->curg);
534 runtime_printf("\n");
535 runtime_m()->traceback = 0;
536 runtime_unlock(&tracelock);
540 runtime_tracefree(void *p, uintptr size)
542 runtime_lock(&tracelock);
543 runtime_m()->traceback = 2;
544 runtime_printf("tracefree(%p, %p)\n", p, size);
545 runtime_goroutineheader(runtime_g());
547 runtime_printf("\n");
548 runtime_m()->traceback = 0;
549 runtime_unlock(&tracelock);
553 runtime_tracegc(void)
555 runtime_lock(&tracelock);
556 runtime_m()->traceback = 2;
557 runtime_printf("tracegc()\n");
558 // running on m->g0 stack; show all non-g0 goroutines
559 runtime_tracebackothers(runtime_g());
560 runtime_printf("end tracegc\n");
561 runtime_printf("\n");
562 runtime_m()->traceback = 0;
563 runtime_unlock(&tracelock);