* config/rs6000/rs6000.c (rs6000_deligitimze_address): Do not
[official-gcc.git] / libgo / runtime / mprof.goc
blob8b3a195b8906af7da50128fc98a746fcc3a9c551
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
15 // NOTE(rsc): Everything here could use cas if contention became an issue.
16 static Lock proflock;
18 enum { MProf, BProf };  // profile types
20 // Per-call-stack profiling information.
21 // Lookup by hashing call stack into a linked-list hash table.
22 typedef struct Bucket Bucket;
23 struct Bucket
25         Bucket  *next;  // next in hash list
26         Bucket  *allnext;       // next in list of all mbuckets/bbuckets
27         int32   typ;
28         union
29         {
30                 struct  // typ == MProf
31                 {
32                         uintptr allocs;
33                         uintptr frees;
34                         uintptr alloc_bytes;
35                         uintptr free_bytes;
36                         uintptr recent_allocs;  // since last gc
37                         uintptr recent_frees;
38                         uintptr recent_alloc_bytes;
39                         uintptr recent_free_bytes;
40                 };
41                 struct  // typ == BProf
42                 {
43                         int64   count;
44                         int64   cycles;
45                 };
46         };
47         uintptr hash;
48         uintptr nstk;
49         uintptr stk[1];
51 enum {
52         BuckHashSize = 179999,
54 static Bucket **buckhash;
55 static Bucket *mbuckets;  // memory profile buckets
56 static Bucket *bbuckets;  // blocking profile buckets
57 static uintptr bucketmem;
59 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
60 static Bucket*
61 stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
63         int32 i;
64         uintptr h;
65         Bucket *b;
67         if(buckhash == nil) {
68                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
69                 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
70         }
72         // Hash stack.
73         h = 0;
74         for(i=0; i<nstk; i++) {
75                 h += stk[i];
76                 h += h<<10;
77                 h ^= h>>6;
78         }
79         h += h<<3;
80         h ^= h>>11;
82         i = h%BuckHashSize;
83         for(b = buckhash[i]; b; b=b->next)
84                 if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk &&
85                    runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
86                         return b;
88         if(!alloc)
89                 return nil;
91         b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
92         bucketmem += sizeof *b + nstk*sizeof stk[0];
93         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
94         b->typ = typ;
95         b->hash = h;
96         b->nstk = nstk;
97         b->next = buckhash[i];
98         buckhash[i] = b;
99         if(typ == MProf) {
100                 b->allnext = mbuckets;
101                 mbuckets = b;
102         } else {
103                 b->allnext = bbuckets;
104                 bbuckets = b;
105         }
106         return b;
109 // Record that a gc just happened: all the 'recent' statistics are now real.
110 void
111 runtime_MProf_GC(void)
113         Bucket *b;
114         
115         runtime_lock(&proflock);
116         for(b=mbuckets; b; b=b->allnext) {
117                 b->allocs += b->recent_allocs;
118                 b->frees += b->recent_frees;
119                 b->alloc_bytes += b->recent_alloc_bytes;
120                 b->free_bytes += b->recent_free_bytes;
121                 b->recent_allocs = 0;
122                 b->recent_frees = 0;
123                 b->recent_alloc_bytes = 0;
124                 b->recent_free_bytes = 0;
125         }
126         runtime_unlock(&proflock);
129 // Map from pointer to Bucket* that allocated it.
130 // Three levels:
131 //      Linked-list hash table for top N-AddrHashShift bits.
132 //      Array index for next AddrDenseBits bits.
133 //      Linked list for next AddrHashShift-AddrDenseBits bits.
134 // This is more efficient than using a general map,
135 // because of the typical clustering of the pointer keys.
137 typedef struct AddrHash AddrHash;
138 typedef struct AddrEntry AddrEntry;
140 enum {
141         AddrHashBits = 12,      // good for 4GB of used address space
142         AddrHashShift = 20,     // each AddrHash knows about 1MB of address space
143         AddrDenseBits = 8,      // good for a profiling rate of 4096 bytes
146 struct AddrHash
148         AddrHash *next; // next in top-level hash table linked list
149         uintptr addr;   // addr>>20
150         AddrEntry *dense[1<<AddrDenseBits];
153 struct AddrEntry
155         AddrEntry *next;        // next in bottom-level linked list
156         uint32 addr;
157         Bucket *b;
160 static AddrHash *addrhash[1<<AddrHashBits];
161 static AddrEntry *addrfree;
162 static uintptr addrmem;
164 // Multiplicative hash function:
165 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
166 // This is a good multiplier as suggested in CLR, Knuth.  The hash
167 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
168 // of the multiplied value.
169 enum {
170         HashMultiplier = 2654435769U
173 // Set the bucket associated with addr to b.
174 static void
175 setaddrbucket(uintptr addr, Bucket *b)
177         int32 i;
178         uint32 h;
179         AddrHash *ah;
180         AddrEntry *e;
182         h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
183         for(ah=addrhash[h]; ah; ah=ah->next)
184                 if(ah->addr == (addr>>AddrHashShift))
185                         goto found;
187         ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
188         addrmem += sizeof *ah;
189         ah->next = addrhash[h];
190         ah->addr = addr>>AddrHashShift;
191         addrhash[h] = ah;
193 found:
194         if((e = addrfree) == nil) {
195                 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
196                 addrmem += 64*sizeof *e;
197                 for(i=0; i+1<64; i++)
198                         e[i].next = &e[i+1];
199                 e[63].next = nil;
200         }
201         addrfree = e->next;
202         e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
203         e->b = b;
204         h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
205         e->next = ah->dense[h];
206         ah->dense[h] = e;
209 // Get the bucket associated with addr and clear the association.
210 static Bucket*
211 getaddrbucket(uintptr addr)
213         uint32 h;
214         AddrHash *ah;
215         AddrEntry *e, **l;
216         Bucket *b;
218         h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
219         for(ah=addrhash[h]; ah; ah=ah->next)
220                 if(ah->addr == (addr>>AddrHashShift))
221                         goto found;
222         return nil;
224 found:
225         h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
226         for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
227                 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
228                         *l = e->next;
229                         b = e->b;
230                         e->next = addrfree;
231                         addrfree = e;
232                         return b;
233                 }
234         }
235         return nil;
238 // Called by malloc to record a profiled block.
239 void
240 runtime_MProf_Malloc(void *p, uintptr size)
242         M *m;
243         int32 nstk;
244         uintptr stk[32];
245         Bucket *b;
247         m = runtime_m();
248         if(m->nomemprof > 0)
249                 return;
251         m->nomemprof++;
252         nstk = runtime_callers(1, stk, 32);
253         runtime_lock(&proflock);
254         b = stkbucket(MProf, stk, nstk, true);
255         b->recent_allocs++;
256         b->recent_alloc_bytes += size;
257         setaddrbucket((uintptr)p, b);
258         runtime_unlock(&proflock);
259         m = runtime_m();
260         m->nomemprof--;
263 // Called when freeing a profiled block.
264 void
265 runtime_MProf_Free(void *p, uintptr size)
267         M *m;
268         Bucket *b;
270         m = runtime_m();
271         if(m->nomemprof > 0)
272                 return;
274         m->nomemprof++;
275         runtime_lock(&proflock);
276         b = getaddrbucket((uintptr)p);
277         if(b != nil) {
278                 b->recent_frees++;
279                 b->recent_free_bytes += size;
280         }
281         runtime_unlock(&proflock);
282         m = runtime_m();
283         m->nomemprof--;
286 int64 runtime_blockprofilerate;  // in CPU ticks
288 void runtime_SetBlockProfileRate(intgo) asm("runtime.SetBlockProfileRate");
290 void
291 runtime_SetBlockProfileRate(intgo rate)
293         runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000));
296 void
297 runtime_blockevent(int64 cycles, int32 skip)
299         int32 nstk;
300         int64 rate;
301         uintptr stk[32];
302         Bucket *b;
304         if(cycles <= 0)
305                 return;
306         rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
307         if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
308                 return;
310         nstk = runtime_callers(skip, stk, 32);
311         runtime_lock(&proflock);
312         b = stkbucket(BProf, stk, nstk, true);
313         b->count++;
314         b->cycles += cycles;
315         runtime_unlock(&proflock);
318 // Go interface to profile data.  (Declared in debug.go)
320 // Must match MemProfileRecord in debug.go.
321 typedef struct Record Record;
322 struct Record {
323         int64 alloc_bytes, free_bytes;
324         int64 alloc_objects, free_objects;
325         uintptr stk[32];
328 // Write b's data to r.
329 static void
330 record(Record *r, Bucket *b)
332         uint32 i;
334         r->alloc_bytes = b->alloc_bytes;
335         r->free_bytes = b->free_bytes;
336         r->alloc_objects = b->allocs;
337         r->free_objects = b->frees;
338         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
339                 r->stk[i] = b->stk[i];
340         for(; i<nelem(r->stk); i++)
341                 r->stk[i] = 0;
344 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
345         Bucket *b;
346         Record *r;
348         runtime_lock(&proflock);
349         n = 0;
350         for(b=mbuckets; b; b=b->allnext)
351                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
352                         n++;
353         ok = false;
354         if(n <= p.__count) {
355                 ok = true;
356                 r = (Record*)p.__values;
357                 for(b=mbuckets; b; b=b->allnext)
358                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
359                                 record(r++, b);
360         }
361         runtime_unlock(&proflock);
364 void
365 runtime_MProf_Mark(void (*addroot)(Obj))
367         // buckhash is not allocated via mallocgc.
368         addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
369         addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
370         addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0});
371         addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0});
374 // Must match BlockProfileRecord in debug.go.
375 typedef struct BRecord BRecord;
376 struct BRecord {
377         int64 count;
378         int64 cycles;
379         uintptr stk[32];
382 func BlockProfile(p Slice) (n int, ok bool) {
383         Bucket *b;
384         BRecord *r;
385         int32 i;
387         runtime_lock(&proflock);
388         n = 0;
389         for(b=bbuckets; b; b=b->allnext)
390                 n++;
391         ok = false;
392         if(n <= p.__count) {
393                 ok = true;
394                 r = (BRecord*)p.__values;
395                 for(b=bbuckets; b; b=b->allnext, r++) {
396                         r->count = b->count;
397                         r->cycles = b->cycles;
398                         for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
399                                 r->stk[i] = b->stk[i];
400                         for(; (uintptr)i<nelem(r->stk); i++)
401                                 r->stk[i] = 0;                  
402                 }
403         }
404         runtime_unlock(&proflock);
407 // Must match StackRecord in debug.go.
408 typedef struct TRecord TRecord;
409 struct TRecord {
410         uintptr stk[32];
413 func ThreadCreateProfile(p Slice) (n int, ok bool) {
414         TRecord *r;
415         M *first, *mp;
416         
417         first = runtime_atomicloadp(&runtime_allm);
418         n = 0;
419         for(mp=first; mp; mp=mp->alllink)
420                 n++;
421         ok = false;
422         if(n <= p.__count) {
423                 ok = true;
424                 r = (TRecord*)p.__values;
425                 for(mp=first; mp; mp=mp->alllink) {
426                         runtime_memmove(r->stk, mp->createstack, sizeof r->stk);
427                         r++;
428                 }
429         }
432 func Stack(b Slice, all bool) (n int) {
433         byte *pc, *sp;
434         bool enablegc;
435         
436         sp = runtime_getcallersp(&b);
437         pc = runtime_getcallerpc(&b);
439         if(all) {
440                 runtime_semacquire(&runtime_worldsema);
441                 runtime_m()->gcing = 1;
442                 runtime_stoptheworld();
443                 enablegc = mstats.enablegc;
444                 mstats.enablegc = false;
445         }
447         if(b.__count == 0)
448                 n = 0;
449         else{
450                 G* g = runtime_g();
451                 g->writebuf = (byte*)b.__values;
452                 g->writenbuf = b.__count;
453                 USED(pc);
454                 USED(sp);
455                 runtime_goroutineheader(g);
456                 runtime_traceback();
457                 runtime_goroutinetrailer(g);
458                 if(all)
459                         runtime_tracebackothers(g);
460                 n = b.__count - g->writenbuf;
461                 g->writebuf = nil;
462                 g->writenbuf = 0;
463         }
464         
465         if(all) {
466                 runtime_m()->gcing = 0;
467                 mstats.enablegc = enablegc;
468                 runtime_semrelease(&runtime_worldsema);
469                 runtime_starttheworld();
470         }
473 static void
474 saveg(G *gp, TRecord *r)
476         int32 n;
478         if(gp == runtime_g())
479                 n = runtime_callers(0, r->stk, nelem(r->stk));
480         else {
481                 // FIXME: Not implemented.
482                 n = 0;
483         }
484         if((size_t)n < nelem(r->stk))
485                 r->stk[n] = 0;
488 func GoroutineProfile(b Slice) (n int, ok bool) {
489         TRecord *r;
490         G *gp;
491         
492         ok = false;
493         n = runtime_gcount();
494         if(n <= b.__count) {
495                 runtime_semacquire(&runtime_worldsema);
496                 runtime_m()->gcing = 1;
497                 runtime_stoptheworld();
499                 n = runtime_gcount();
500                 if(n <= b.__count) {
501                         G* g = runtime_g();
502                         ok = true;
503                         r = (TRecord*)b.__values;
504                         saveg(g, r++);
505                         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
506                                 if(gp == g || gp->status == Gdead)
507                                         continue;
508                                 saveg(gp, r++);
509                         }
510                 }
511         
512                 runtime_m()->gcing = 0;
513                 runtime_semrelease(&runtime_worldsema);
514                 runtime_starttheworld();
515         }