1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
14 #include "go-string.h"
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
17 static Lock proflock, alloclock;
19 // All memory allocations are local and do not escape outside of the profiler.
20 // The profiler is forbidden from referring to garbage-collected memory.
22 static byte *pool; // memory allocation pool
23 static uintptr poolfree; // number of bytes left in the pool
25 Chunk = 32*PageSize, // initial size of the pool
28 // Memory allocation local to this file.
29 // There is no way to return the allocated memory back to the OS.
31 allocate(uintptr size)
39 return runtime_SysAlloc(size);
41 runtime_lock(&alloclock);
43 pool = runtime_SysAlloc(Chunk);
45 runtime_throw("runtime: cannot allocate memory");
51 runtime_unlock(&alloclock);
55 enum { MProf, BProf }; // profile types
57 // Per-call-stack profiling information.
58 // Lookup by hashing call stack into a linked-list hash table.
59 typedef struct Bucket Bucket;
62 Bucket *next; // next in hash list
63 Bucket *allnext; // next in list of all mbuckets/bbuckets
65 // Generally unions can break precise GC,
66 // this one is fine because it does not contain pointers.
69 struct // typ == MProf
75 uintptr recent_allocs; // since last gc
77 uintptr recent_alloc_bytes;
78 uintptr recent_free_bytes;
80 struct // typ == BProf
91 BuckHashSize = 179999,
93 static Bucket **buckhash;
94 static Bucket *mbuckets; // memory profile buckets
95 static Bucket *bbuckets; // blocking profile buckets
96 static uintptr bucketmem;
98 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
100 stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
106 if(buckhash == nil) {
107 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
109 runtime_throw("runtime: cannot allocate memory");
110 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
115 for(i=0; i<nstk; i++) {
124 for(b = buckhash[i]; b; b=b->next) {
125 if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk) {
126 for(j = 0; j < nstk; j++) {
127 if(b->stk[j].pc != stk[j].pc ||
128 b->stk[j].lineno != stk[j].lineno ||
129 !__go_strings_equal(b->stk[j].filename, stk[j].filename))
140 b = allocate(sizeof *b + nstk*sizeof stk[0]);
142 runtime_throw("runtime: cannot allocate memory");
143 bucketmem += sizeof *b + nstk*sizeof stk[0];
144 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
148 b->next = buckhash[i];
151 b->allnext = mbuckets;
154 b->allnext = bbuckets;
165 for(b=mbuckets; b; b=b->allnext) {
166 b->allocs += b->recent_allocs;
167 b->frees += b->recent_frees;
168 b->alloc_bytes += b->recent_alloc_bytes;
169 b->free_bytes += b->recent_free_bytes;
170 b->recent_allocs = 0;
172 b->recent_alloc_bytes = 0;
173 b->recent_free_bytes = 0;
177 // Record that a gc just happened: all the 'recent' statistics are now real.
179 runtime_MProf_GC(void)
181 runtime_lock(&proflock);
183 runtime_unlock(&proflock);
186 // Map from pointer to Bucket* that allocated it.
188 // Linked-list hash table for top N-AddrHashShift bits.
189 // Array index for next AddrDenseBits bits.
190 // Linked list for next AddrHashShift-AddrDenseBits bits.
191 // This is more efficient than using a general map,
192 // because of the typical clustering of the pointer keys.
194 typedef struct AddrHash AddrHash;
195 typedef struct AddrEntry AddrEntry;
198 AddrHashBits = 12, // good for 4GB of used address space
199 AddrHashShift = 20, // each AddrHash knows about 1MB of address space
200 AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
205 AddrHash *next; // next in top-level hash table linked list
206 uintptr addr; // addr>>20
207 AddrEntry *dense[1<<AddrDenseBits];
212 AddrEntry *next; // next in bottom-level linked list
217 static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
218 static AddrEntry *addrfree;
219 static uintptr addrmem;
221 // Multiplicative hash function:
222 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
223 // This is a good multiplier as suggested in CLR, Knuth. The hash
224 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
225 // of the multiplied value.
227 HashMultiplier = 2654435769U
230 // Set the bucket associated with addr to b.
232 setaddrbucket(uintptr addr, Bucket *b)
239 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
240 for(ah=addrhash[h]; ah; ah=ah->next)
241 if(ah->addr == (addr>>AddrHashShift))
244 ah = allocate(sizeof *ah);
245 addrmem += sizeof *ah;
246 ah->next = addrhash[h];
247 ah->addr = addr>>AddrHashShift;
251 if((e = addrfree) == nil) {
252 e = allocate(64*sizeof *e);
253 addrmem += 64*sizeof *e;
254 for(i=0; i+1<64; i++)
259 e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
261 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
262 e->next = ah->dense[h];
266 // Get the bucket associated with addr and clear the association.
268 getaddrbucket(uintptr addr)
275 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
276 for(ah=addrhash[h]; ah; ah=ah->next)
277 if(ah->addr == (addr>>AddrHashShift))
282 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
283 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
284 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
295 // Called by malloc to record a profiled block.
297 runtime_MProf_Malloc(void *p, uintptr size)
309 nstk = runtime_callers(1, stk, 32);
310 runtime_lock(&proflock);
311 b = stkbucket(MProf, stk, nstk, true);
313 b->recent_alloc_bytes += size;
314 setaddrbucket((uintptr)p, b);
315 runtime_unlock(&proflock);
320 // Called when freeing a profiled block.
322 runtime_MProf_Free(void *p, uintptr size)
332 runtime_lock(&proflock);
333 b = getaddrbucket((uintptr)p);
336 b->recent_free_bytes += size;
338 runtime_unlock(&proflock);
343 int64 runtime_blockprofilerate; // in CPU ticks
345 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
348 runtime_SetBlockProfileRate(intgo rate)
350 runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000));
354 runtime_blockevent(int64 cycles, int32 skip)
363 rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
364 if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
367 nstk = runtime_callers(skip, stk, 32);
368 runtime_lock(&proflock);
369 b = stkbucket(BProf, stk, nstk, true);
372 runtime_unlock(&proflock);
375 // Go interface to profile data. (Declared in debug.go)
377 // Must match MemProfileRecord in debug.go.
378 typedef struct Record Record;
380 int64 alloc_bytes, free_bytes;
381 int64 alloc_objects, free_objects;
385 // Write b's data to r.
387 record(Record *r, Bucket *b)
391 r->alloc_bytes = b->alloc_bytes;
392 r->free_bytes = b->free_bytes;
393 r->alloc_objects = b->allocs;
394 r->free_objects = b->frees;
395 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
396 r->stk[i] = b->stk[i].pc;
397 for(; i<nelem(r->stk); i++)
401 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
406 runtime_lock(&proflock);
409 for(b=mbuckets; b; b=b->allnext) {
410 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
412 if(b->allocs != 0 || b->frees != 0)
416 // Absolutely no data, suggesting that a garbage collection
417 // has not yet happened. In order to allow profiling when
418 // garbage collection is disabled from the beginning of execution,
419 // accumulate stats as if a GC just happened, and recount buckets.
422 for(b=mbuckets; b; b=b->allnext)
423 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
429 r = (Record*)p.__values;
430 for(b=mbuckets; b; b=b->allnext)
431 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
434 runtime_unlock(&proflock);
438 runtime_MProf_Mark(void (*addroot)(Obj))
440 // buckhash is not allocated via mallocgc.
441 addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
442 addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
443 addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0});
444 addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0});
447 // Must match BlockProfileRecord in debug.go.
448 typedef struct BRecord BRecord;
455 func BlockProfile(p Slice) (n int, ok bool) {
460 runtime_lock(&proflock);
462 for(b=bbuckets; b; b=b->allnext)
467 r = (BRecord*)p.__values;
468 for(b=bbuckets; b; b=b->allnext, r++) {
470 r->cycles = b->cycles;
471 for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
472 r->stk[i] = b->stk[i].pc;
473 for(; (uintptr)i<nelem(r->stk); i++)
477 runtime_unlock(&proflock);
480 // Must match StackRecord in debug.go.
481 typedef struct TRecord TRecord;
486 func ThreadCreateProfile(p Slice) (n int, ok bool) {
491 first = runtime_atomicloadp(&runtime_allm);
493 for(mp=first; mp; mp=mp->alllink)
498 r = (TRecord*)p.__values;
499 for(mp=first; mp; mp=mp->alllink) {
500 for(i = 0; (uintptr)i < nelem(r->stk); i++) {
501 r->stk[i] = mp->createstack[i].pc;
508 func Stack(b Slice, all bool) (n int) {
512 sp = runtime_getcallersp(&b);
513 pc = runtime_getcallerpc(&b);
516 runtime_semacquire(&runtime_worldsema);
517 runtime_m()->gcing = 1;
518 runtime_stoptheworld();
519 enablegc = mstats.enablegc;
520 mstats.enablegc = false;
527 g->writebuf = (byte*)b.__values;
528 g->writenbuf = b.__count;
531 runtime_goroutineheader(g);
533 runtime_goroutinetrailer(g);
535 runtime_tracebackothers(g);
536 n = b.__count - g->writenbuf;
542 runtime_m()->gcing = 0;
543 mstats.enablegc = enablegc;
544 runtime_semrelease(&runtime_worldsema);
545 runtime_starttheworld();
550 saveg(G *gp, TRecord *r)
553 Location locstk[nelem(r->stk)];
555 if(gp == runtime_g()) {
556 n = runtime_callers(0, locstk, nelem(r->stk));
557 for(i = 0; i < n; i++)
558 r->stk[i] = locstk[i].pc;
561 // FIXME: Not implemented.
564 if((size_t)n < nelem(r->stk))
568 func GoroutineProfile(b Slice) (n int, ok bool) {
573 n = runtime_gcount();
575 runtime_semacquire(&runtime_worldsema);
576 runtime_m()->gcing = 1;
577 runtime_stoptheworld();
579 n = runtime_gcount();
583 r = (TRecord*)b.__values;
585 for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
586 if(gp == g || gp->status == Gdead)
592 runtime_m()->gcing = 0;
593 runtime_semrelease(&runtime_worldsema);
594 runtime_starttheworld();
599 runtime_mprofinit(void)
601 addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);