1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
14 #include "go-string.h"
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
19 // All memory allocations are local and do not escape outside of the profiler.
20 // The profiler is forbidden from referring to garbage-collected memory.
22 enum { MProf, BProf }; // profile types
24 // Per-call-stack profiling information.
25 // Lookup by hashing call stack into a linked-list hash table.
26 typedef struct Bucket Bucket;
29 Bucket *next; // next in hash list
30 Bucket *allnext; // next in list of all mbuckets/bbuckets
32 // Generally unions can break precise GC,
33 // this one is fine because it does not contain pointers.
36 struct // typ == MProf
42 uintptr recent_allocs; // since last gc
44 uintptr recent_alloc_bytes;
45 uintptr recent_free_bytes;
47 struct // typ == BProf
58 BuckHashSize = 179999,
60 static Bucket **buckhash;
61 static Bucket *mbuckets; // memory profile buckets
62 static Bucket *bbuckets; // blocking profile buckets
63 static uintptr bucketmem;
65 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
67 stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
74 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys);
76 runtime_throw("runtime: cannot allocate memory");
81 for(i=0; i<nstk; i++) {
90 for(b = buckhash[i]; b; b=b->next) {
91 if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk) {
92 for(j = 0; j < nstk; j++) {
93 if(b->stk[j].pc != stk[j].pc ||
94 b->stk[j].lineno != stk[j].lineno ||
95 !__go_strings_equal(b->stk[j].filename, stk[j].filename))
106 b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys);
107 bucketmem += sizeof *b + nstk*sizeof stk[0];
108 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
112 b->next = buckhash[i];
115 b->allnext = mbuckets;
118 b->allnext = bbuckets;
129 for(b=mbuckets; b; b=b->allnext) {
130 b->allocs += b->recent_allocs;
131 b->frees += b->recent_frees;
132 b->alloc_bytes += b->recent_alloc_bytes;
133 b->free_bytes += b->recent_free_bytes;
134 b->recent_allocs = 0;
136 b->recent_alloc_bytes = 0;
137 b->recent_free_bytes = 0;
141 // Record that a gc just happened: all the 'recent' statistics are now real.
143 runtime_MProf_GC(void)
145 runtime_lock(&proflock);
147 runtime_unlock(&proflock);
150 // Map from pointer to Bucket* that allocated it.
152 // Linked-list hash table for top N-AddrHashShift bits.
153 // Array index for next AddrDenseBits bits.
154 // Linked list for next AddrHashShift-AddrDenseBits bits.
155 // This is more efficient than using a general map,
156 // because of the typical clustering of the pointer keys.
158 typedef struct AddrHash AddrHash;
159 typedef struct AddrEntry AddrEntry;
162 AddrHashBits = 12, // good for 4GB of used address space
163 AddrHashShift = 20, // each AddrHash knows about 1MB of address space
164 AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
169 AddrHash *next; // next in top-level hash table linked list
170 uintptr addr; // addr>>20
171 AddrEntry *dense[1<<AddrDenseBits];
176 AddrEntry *next; // next in bottom-level linked list
181 static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
182 static AddrEntry *addrfree;
183 static uintptr addrmem;
185 // Multiplicative hash function:
186 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
187 // This is a good multiplier as suggested in CLR, Knuth. The hash
188 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
189 // of the multiplied value.
191 HashMultiplier = 2654435769U
194 // Set the bucket associated with addr to b.
196 setaddrbucket(uintptr addr, Bucket *b)
203 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
204 for(ah=addrhash[h]; ah; ah=ah->next)
205 if(ah->addr == (addr>>AddrHashShift))
208 ah = runtime_persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys);
209 addrmem += sizeof *ah;
210 ah->next = addrhash[h];
211 ah->addr = addr>>AddrHashShift;
215 if((e = addrfree) == nil) {
216 e = runtime_persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys);
217 addrmem += 64*sizeof *e;
218 for(i=0; i+1<64; i++)
223 e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
225 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
226 e->next = ah->dense[h];
230 // Get the bucket associated with addr and clear the association.
232 getaddrbucket(uintptr addr)
239 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
240 for(ah=addrhash[h]; ah; ah=ah->next)
241 if(ah->addr == (addr>>AddrHashShift))
246 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
247 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
248 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
259 // Called by malloc to record a profiled block.
261 runtime_MProf_Malloc(void *p, uintptr size)
267 nstk = runtime_callers(1, stk, 32);
268 runtime_lock(&proflock);
269 b = stkbucket(MProf, stk, nstk, true);
271 b->recent_alloc_bytes += size;
272 setaddrbucket((uintptr)p, b);
273 runtime_unlock(&proflock);
276 // Called when freeing a profiled block.
278 runtime_MProf_Free(void *p, uintptr size)
282 runtime_lock(&proflock);
283 b = getaddrbucket((uintptr)p);
286 b->recent_free_bytes += size;
288 runtime_unlock(&proflock);
291 int64 runtime_blockprofilerate; // in CPU ticks
293 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
296 runtime_SetBlockProfileRate(intgo rate)
301 r = 0; // disable profiling
303 // convert ns to cycles, use float64 to prevent overflow during multiplication
304 r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000);
308 runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r);
312 runtime_blockevent(int64 cycles, int32 skip)
321 rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
322 if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
325 nstk = runtime_callers(skip, stk, 32);
326 runtime_lock(&proflock);
327 b = stkbucket(BProf, stk, nstk, true);
330 runtime_unlock(&proflock);
333 // Go interface to profile data. (Declared in debug.go)
335 // Must match MemProfileRecord in debug.go.
336 typedef struct Record Record;
338 int64 alloc_bytes, free_bytes;
339 int64 alloc_objects, free_objects;
343 // Write b's data to r.
345 record(Record *r, Bucket *b)
349 r->alloc_bytes = b->alloc_bytes;
350 r->free_bytes = b->free_bytes;
351 r->alloc_objects = b->allocs;
352 r->free_objects = b->frees;
353 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
354 r->stk[i] = b->stk[i].pc;
355 for(; i<nelem(r->stk); i++)
359 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
364 runtime_lock(&proflock);
367 for(b=mbuckets; b; b=b->allnext) {
368 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
370 if(b->allocs != 0 || b->frees != 0)
374 // Absolutely no data, suggesting that a garbage collection
375 // has not yet happened. In order to allow profiling when
376 // garbage collection is disabled from the beginning of execution,
377 // accumulate stats as if a GC just happened, and recount buckets.
380 for(b=mbuckets; b; b=b->allnext)
381 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
387 r = (Record*)p.__values;
388 for(b=mbuckets; b; b=b->allnext)
389 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
392 runtime_unlock(&proflock);
396 runtime_MProf_Mark(void (*addroot)(Obj))
398 // buckhash is not allocated via mallocgc.
399 addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
400 addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
401 addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0});
402 addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0});
405 // Must match BlockProfileRecord in debug.go.
406 typedef struct BRecord BRecord;
413 func BlockProfile(p Slice) (n int, ok bool) {
418 runtime_lock(&proflock);
420 for(b=bbuckets; b; b=b->allnext)
425 r = (BRecord*)p.__values;
426 for(b=bbuckets; b; b=b->allnext, r++) {
428 r->cycles = b->cycles;
429 for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
430 r->stk[i] = b->stk[i].pc;
431 for(; (uintptr)i<nelem(r->stk); i++)
435 runtime_unlock(&proflock);
438 // Must match StackRecord in debug.go.
439 typedef struct TRecord TRecord;
444 func ThreadCreateProfile(p Slice) (n int, ok bool) {
449 first = runtime_atomicloadp(&runtime_allm);
451 for(mp=first; mp; mp=mp->alllink)
456 r = (TRecord*)p.__values;
457 for(mp=first; mp; mp=mp->alllink) {
458 for(i = 0; (uintptr)i < nelem(r->stk); i++) {
459 r->stk[i] = mp->createstack[i].pc;
466 func Stack(b Slice, all bool) (n int) {
470 sp = runtime_getcallersp(&b);
471 pc = (byte*)(uintptr)runtime_getcallerpc(&b);
474 runtime_semacquire(&runtime_worldsema, false);
475 runtime_m()->gcing = 1;
476 runtime_stoptheworld();
477 enablegc = mstats.enablegc;
478 mstats.enablegc = false;
485 g->writebuf = (byte*)b.__values;
486 g->writenbuf = b.__count;
489 runtime_goroutineheader(g);
491 runtime_printcreatedby(g);
493 runtime_tracebackothers(g);
494 n = b.__count - g->writenbuf;
500 runtime_m()->gcing = 0;
501 mstats.enablegc = enablegc;
502 runtime_semrelease(&runtime_worldsema);
503 runtime_starttheworld();
508 saveg(G *gp, TRecord *r)
511 Location locstk[nelem(r->stk)];
513 if(gp == runtime_g()) {
514 n = runtime_callers(0, locstk, nelem(r->stk));
515 for(i = 0; i < n; i++)
516 r->stk[i] = locstk[i].pc;
519 // FIXME: Not implemented.
522 if((size_t)n < nelem(r->stk))
526 func GoroutineProfile(b Slice) (n int, ok bool) {
531 n = runtime_gcount();
533 runtime_semacquire(&runtime_worldsema, false);
534 runtime_m()->gcing = 1;
535 runtime_stoptheworld();
537 n = runtime_gcount();
541 r = (TRecord*)b.__values;
543 for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
544 if(gp == g || gp->status == Gdead)
550 runtime_m()->gcing = 0;
551 runtime_semrelease(&runtime_worldsema);
552 runtime_starttheworld();
557 runtime_mprofinit(void)
559 addrhash = runtime_persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys);