1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
15 // NOTE(rsc): Everything here could use cas if contention became an issue.
18 enum { MProf, BProf }; // profile types
20 // Per-call-stack profiling information.
21 // Lookup by hashing call stack into a linked-list hash table.
22 typedef struct Bucket Bucket;
25 Bucket *next; // next in hash list
26 Bucket *allnext; // next in list of all mbuckets/bbuckets
30 struct // typ == MProf
36 uintptr recent_allocs; // since last gc
38 uintptr recent_alloc_bytes;
39 uintptr recent_free_bytes;
41 struct // typ == BProf
52 BuckHashSize = 179999,
54 static Bucket **buckhash;
55 static Bucket *mbuckets; // memory profile buckets
56 static Bucket *bbuckets; // blocking profile buckets
57 static uintptr bucketmem;
59 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
61 stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
68 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
69 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
74 for(i=0; i<nstk; i++) {
83 for(b = buckhash[i]; b; b=b->next)
84 if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk &&
85 runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
91 b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
92 bucketmem += sizeof *b + nstk*sizeof stk[0];
93 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
97 b->next = buckhash[i];
100 b->allnext = mbuckets;
103 b->allnext = bbuckets;
109 // Record that a gc just happened: all the 'recent' statistics are now real.
111 runtime_MProf_GC(void)
115 runtime_lock(&proflock);
116 for(b=mbuckets; b; b=b->allnext) {
117 b->allocs += b->recent_allocs;
118 b->frees += b->recent_frees;
119 b->alloc_bytes += b->recent_alloc_bytes;
120 b->free_bytes += b->recent_free_bytes;
121 b->recent_allocs = 0;
123 b->recent_alloc_bytes = 0;
124 b->recent_free_bytes = 0;
126 runtime_unlock(&proflock);
129 // Map from pointer to Bucket* that allocated it.
131 // Linked-list hash table for top N-AddrHashShift bits.
132 // Array index for next AddrDenseBits bits.
133 // Linked list for next AddrHashShift-AddrDenseBits bits.
134 // This is more efficient than using a general map,
135 // because of the typical clustering of the pointer keys.
137 typedef struct AddrHash AddrHash;
138 typedef struct AddrEntry AddrEntry;
141 AddrHashBits = 12, // good for 4GB of used address space
142 AddrHashShift = 20, // each AddrHash knows about 1MB of address space
143 AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
148 AddrHash *next; // next in top-level hash table linked list
149 uintptr addr; // addr>>20
150 AddrEntry *dense[1<<AddrDenseBits];
155 AddrEntry *next; // next in bottom-level linked list
160 static AddrHash *addrhash[1<<AddrHashBits];
161 static AddrEntry *addrfree;
162 static uintptr addrmem;
164 // Multiplicative hash function:
165 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
166 // This is a good multiplier as suggested in CLR, Knuth. The hash
167 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
168 // of the multiplied value.
170 HashMultiplier = 2654435769U
173 // Set the bucket associated with addr to b.
175 setaddrbucket(uintptr addr, Bucket *b)
182 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
183 for(ah=addrhash[h]; ah; ah=ah->next)
184 if(ah->addr == (addr>>AddrHashShift))
187 ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
188 addrmem += sizeof *ah;
189 ah->next = addrhash[h];
190 ah->addr = addr>>AddrHashShift;
194 if((e = addrfree) == nil) {
195 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
196 addrmem += 64*sizeof *e;
197 for(i=0; i+1<64; i++)
202 e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
204 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
205 e->next = ah->dense[h];
209 // Get the bucket associated with addr and clear the association.
211 getaddrbucket(uintptr addr)
218 h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
219 for(ah=addrhash[h]; ah; ah=ah->next)
220 if(ah->addr == (addr>>AddrHashShift))
225 h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
226 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
227 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
238 // Called by malloc to record a profiled block.
240 runtime_MProf_Malloc(void *p, uintptr size)
252 nstk = runtime_callers(1, stk, 32);
253 runtime_lock(&proflock);
254 b = stkbucket(MProf, stk, nstk, true);
256 b->recent_alloc_bytes += size;
257 setaddrbucket((uintptr)p, b);
258 runtime_unlock(&proflock);
263 // Called when freeing a profiled block.
265 runtime_MProf_Free(void *p, uintptr size)
275 runtime_lock(&proflock);
276 b = getaddrbucket((uintptr)p);
279 b->recent_free_bytes += size;
281 runtime_unlock(&proflock);
286 int64 runtime_blockprofilerate; // in CPU ticks
288 void runtime_SetBlockProfileRate(intgo) asm("runtime.SetBlockProfileRate");
291 runtime_SetBlockProfileRate(intgo rate)
293 runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000));
297 runtime_blockevent(int64 cycles, int32 skip)
306 rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
307 if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
310 nstk = runtime_callers(skip, stk, 32);
311 runtime_lock(&proflock);
312 b = stkbucket(BProf, stk, nstk, true);
315 runtime_unlock(&proflock);
318 // Go interface to profile data. (Declared in extern.go)
319 // Assumes Go sizeof(int) == sizeof(int32)
321 // Must match MemProfileRecord in debug.go.
322 typedef struct Record Record;
324 int64 alloc_bytes, free_bytes;
325 int64 alloc_objects, free_objects;
329 // Write b's data to r.
331 record(Record *r, Bucket *b)
335 r->alloc_bytes = b->alloc_bytes;
336 r->free_bytes = b->free_bytes;
337 r->alloc_objects = b->allocs;
338 r->free_objects = b->frees;
339 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
340 r->stk[i] = b->stk[i];
341 for(; i<nelem(r->stk); i++)
345 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
349 runtime_lock(&proflock);
351 for(b=mbuckets; b; b=b->allnext)
352 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
357 r = (Record*)p.__values;
358 for(b=mbuckets; b; b=b->allnext)
359 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
362 runtime_unlock(&proflock);
366 runtime_MProf_Mark(void (*addroot)(byte *, uintptr))
368 // buckhash is not allocated via mallocgc.
369 addroot((byte*)&mbuckets, sizeof mbuckets);
370 addroot((byte*)&bbuckets, sizeof bbuckets);
371 addroot((byte*)&addrhash, sizeof addrhash);
372 addroot((byte*)&addrfree, sizeof addrfree);
375 // Must match BlockProfileRecord in debug.go.
376 typedef struct BRecord BRecord;
383 func BlockProfile(p Slice) (n int, ok bool) {
388 runtime_lock(&proflock);
390 for(b=bbuckets; b; b=b->allnext)
395 r = (BRecord*)p.__values;
396 for(b=bbuckets; b; b=b->allnext, r++) {
398 r->cycles = b->cycles;
399 for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
400 r->stk[i] = b->stk[i];
401 for(; (uintptr)i<nelem(r->stk); i++)
405 runtime_unlock(&proflock);
408 // Must match StackRecord in debug.go.
409 typedef struct TRecord TRecord;
414 func ThreadCreateProfile(p Slice) (n int, ok bool) {
418 first = runtime_atomicloadp(&runtime_allm);
420 for(m=first; m; m=m->alllink)
425 r = (TRecord*)p.__values;
426 for(m=first; m; m=m->alllink) {
427 runtime_memmove(r->stk, m->createstack, sizeof r->stk);
433 func Stack(b Slice, all bool) (n int) {
437 sp = runtime_getcallersp(&b);
438 pc = runtime_getcallerpc(&b);
441 runtime_semacquire(&runtime_worldsema);
442 runtime_m()->gcing = 1;
443 runtime_stoptheworld();
444 enablegc = mstats.enablegc;
445 mstats.enablegc = false;
452 g->writebuf = (byte*)b.__values;
453 g->writenbuf = b.__count;
456 runtime_goroutineheader(g);
458 runtime_goroutinetrailer(g);
460 runtime_tracebackothers(g);
461 n = b.__count - g->writenbuf;
467 runtime_m()->gcing = 0;
468 mstats.enablegc = enablegc;
469 runtime_semrelease(&runtime_worldsema);
470 runtime_starttheworld();
475 saveg(G *g, TRecord *r)
480 n = runtime_callers(0, r->stk, nelem(r->stk));
482 // FIXME: Not implemented.
485 if((size_t)n < nelem(r->stk))
489 func GoroutineProfile(b Slice) (n int, ok bool) {
494 n = runtime_gcount();
496 runtime_semacquire(&runtime_worldsema);
497 runtime_m()->gcing = 1;
498 runtime_stoptheworld();
500 n = runtime_gcount();
504 r = (TRecord*)b.__values;
506 for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
507 if(gp == g || gp->status == Gdead)
513 runtime_m()->gcing = 0;
514 runtime_semrelease(&runtime_worldsema);
515 runtime_starttheworld();