Restore check for OpenMP for construct.
[official-gcc.git] / libgo / runtime / mprof.goc
blob7507dfc917389f30bfb0376acd6a85430c40657a
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
14 #include "go-string.h"
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
17 static Lock proflock;
19 // All memory allocations are local and do not escape outside of the profiler.
20 // The profiler is forbidden from referring to garbage-collected memory.
22 enum { MProf, BProf };  // profile types
24 // Per-call-stack profiling information.
25 // Lookup by hashing call stack into a linked-list hash table.
26 typedef struct Bucket Bucket;
27 struct Bucket
29         Bucket  *next;  // next in hash list
30         Bucket  *allnext;       // next in list of all mbuckets/bbuckets
31         int32   typ;
32         // Generally unions can break precise GC,
33         // this one is fine because it does not contain pointers.
34         union
35         {
36                 struct  // typ == MProf
37                 {
38                         uintptr allocs;
39                         uintptr frees;
40                         uintptr alloc_bytes;
41                         uintptr free_bytes;
42                         uintptr recent_allocs;  // since last gc
43                         uintptr recent_frees;
44                         uintptr recent_alloc_bytes;
45                         uintptr recent_free_bytes;
46                 };
47                 struct  // typ == BProf
48                 {
49                         int64   count;
50                         int64   cycles;
51                 };
52         };
53         uintptr hash;
54         uintptr nstk;
55         Location stk[1];
57 enum {
58         BuckHashSize = 179999,
60 static Bucket **buckhash;
61 static Bucket *mbuckets;  // memory profile buckets
62 static Bucket *bbuckets;  // blocking profile buckets
63 static uintptr bucketmem;
65 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
66 static Bucket*
67 stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
69         int32 i, j;
70         uintptr h;
71         Bucket *b;
73         if(buckhash == nil) {
74                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys);
75                 if(buckhash == nil)
76                         runtime_throw("runtime: cannot allocate memory");
77         }
79         // Hash stack.
80         h = 0;
81         for(i=0; i<nstk; i++) {
82                 h += stk[i].pc;
83                 h += h<<10;
84                 h ^= h>>6;
85         }
86         h += h<<3;
87         h ^= h>>11;
89         i = h%BuckHashSize;
90         for(b = buckhash[i]; b; b=b->next) {
91                 if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk) {
92                         for(j = 0; j < nstk; j++) {
93                                 if(b->stk[j].pc != stk[j].pc ||
94                                    b->stk[j].lineno != stk[j].lineno ||
95                                    !__go_strings_equal(b->stk[j].filename, stk[j].filename))
96                                         break;
97                         }
98                         if (j == nstk)
99                                 return b;
100                 }
101         }
103         if(!alloc)
104                 return nil;
106         b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys);
107         bucketmem += sizeof *b + nstk*sizeof stk[0];
108         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
109         b->typ = typ;
110         b->hash = h;
111         b->nstk = nstk;
112         b->next = buckhash[i];
113         buckhash[i] = b;
114         if(typ == MProf) {
115                 b->allnext = mbuckets;
116                 mbuckets = b;
117         } else {
118                 b->allnext = bbuckets;
119                 bbuckets = b;
120         }
121         return b;
124 static void
125 MProf_GC(void)
127         Bucket *b;
129         for(b=mbuckets; b; b=b->allnext) {
130                 b->allocs += b->recent_allocs;
131                 b->frees += b->recent_frees;
132                 b->alloc_bytes += b->recent_alloc_bytes;
133                 b->free_bytes += b->recent_free_bytes;
134                 b->recent_allocs = 0;
135                 b->recent_frees = 0;
136                 b->recent_alloc_bytes = 0;
137                 b->recent_free_bytes = 0;
138         }
141 // Record that a gc just happened: all the 'recent' statistics are now real.
142 void
143 runtime_MProf_GC(void)
145         runtime_lock(&proflock);
146         MProf_GC();
147         runtime_unlock(&proflock);
150 // Map from pointer to Bucket* that allocated it.
151 // Three levels:
152 //      Linked-list hash table for top N-AddrHashShift bits.
153 //      Array index for next AddrDenseBits bits.
154 //      Linked list for next AddrHashShift-AddrDenseBits bits.
155 // This is more efficient than using a general map,
156 // because of the typical clustering of the pointer keys.
158 typedef struct AddrHash AddrHash;
159 typedef struct AddrEntry AddrEntry;
161 enum {
162         AddrHashBits = 12,      // good for 4GB of used address space
163         AddrHashShift = 20,     // each AddrHash knows about 1MB of address space
164         AddrDenseBits = 8,      // good for a profiling rate of 4096 bytes
167 struct AddrHash
169         AddrHash *next; // next in top-level hash table linked list
170         uintptr addr;   // addr>>20
171         AddrEntry *dense[1<<AddrDenseBits];
174 struct AddrEntry
176         AddrEntry *next;        // next in bottom-level linked list
177         uint32 addr;
178         Bucket *b;
181 static AddrHash **addrhash;     // points to (AddrHash*)[1<<AddrHashBits]
182 static AddrEntry *addrfree;
183 static uintptr addrmem;
185 // Multiplicative hash function:
186 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
187 // This is a good multiplier as suggested in CLR, Knuth.  The hash
188 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
189 // of the multiplied value.
190 enum {
191         HashMultiplier = 2654435769U
194 // Set the bucket associated with addr to b.
195 static void
196 setaddrbucket(uintptr addr, Bucket *b)
198         int32 i;
199         uint32 h;
200         AddrHash *ah;
201         AddrEntry *e;
203         h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
204         for(ah=addrhash[h]; ah; ah=ah->next)
205                 if(ah->addr == (addr>>AddrHashShift))
206                         goto found;
208         ah = runtime_persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys);
209         addrmem += sizeof *ah;
210         ah->next = addrhash[h];
211         ah->addr = addr>>AddrHashShift;
212         addrhash[h] = ah;
214 found:
215         if((e = addrfree) == nil) {
216                 e = runtime_persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys);
217                 addrmem += 64*sizeof *e;
218                 for(i=0; i+1<64; i++)
219                         e[i].next = &e[i+1];
220                 e[63].next = nil;
221         }
222         addrfree = e->next;
223         e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
224         e->b = b;
225         h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
226         e->next = ah->dense[h];
227         ah->dense[h] = e;
230 // Get the bucket associated with addr and clear the association.
231 static Bucket*
232 getaddrbucket(uintptr addr)
234         uint32 h;
235         AddrHash *ah;
236         AddrEntry *e, **l;
237         Bucket *b;
239         h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
240         for(ah=addrhash[h]; ah; ah=ah->next)
241                 if(ah->addr == (addr>>AddrHashShift))
242                         goto found;
243         return nil;
245 found:
246         h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
247         for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
248                 if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
249                         *l = e->next;
250                         b = e->b;
251                         e->next = addrfree;
252                         addrfree = e;
253                         return b;
254                 }
255         }
256         return nil;
259 // Called by malloc to record a profiled block.
260 void
261 runtime_MProf_Malloc(void *p, uintptr size)
263         int32 nstk;
264         Location stk[32];
265         Bucket *b;
267         nstk = runtime_callers(1, stk, 32);
268         runtime_lock(&proflock);
269         b = stkbucket(MProf, stk, nstk, true);
270         b->recent_allocs++;
271         b->recent_alloc_bytes += size;
272         setaddrbucket((uintptr)p, b);
273         runtime_unlock(&proflock);
276 // Called when freeing a profiled block.
277 void
278 runtime_MProf_Free(void *p, uintptr size)
280         Bucket *b;
282         runtime_lock(&proflock);
283         b = getaddrbucket((uintptr)p);
284         if(b != nil) {
285                 b->recent_frees++;
286                 b->recent_free_bytes += size;
287         }
288         runtime_unlock(&proflock);
291 int64 runtime_blockprofilerate;  // in CPU ticks
293 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
295 void
296 runtime_SetBlockProfileRate(intgo rate)
298         int64 r;
300         if(rate <= 0)
301                 r = 0;  // disable profiling
302         else {
303                 // convert ns to cycles, use float64 to prevent overflow during multiplication
304                 r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000);
305                 if(r == 0)
306                         r = 1;
307         }
308         runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r);
311 void
312 runtime_blockevent(int64 cycles, int32 skip)
314         int32 nstk;
315         int64 rate;
316         Location stk[32];
317         Bucket *b;
319         if(cycles <= 0)
320                 return;
321         rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
322         if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
323                 return;
325         nstk = runtime_callers(skip, stk, 32);
326         runtime_lock(&proflock);
327         b = stkbucket(BProf, stk, nstk, true);
328         b->count++;
329         b->cycles += cycles;
330         runtime_unlock(&proflock);
333 // Go interface to profile data.  (Declared in debug.go)
335 // Must match MemProfileRecord in debug.go.
336 typedef struct Record Record;
337 struct Record {
338         int64 alloc_bytes, free_bytes;
339         int64 alloc_objects, free_objects;
340         uintptr stk[32];
343 // Write b's data to r.
344 static void
345 record(Record *r, Bucket *b)
347         uint32 i;
349         r->alloc_bytes = b->alloc_bytes;
350         r->free_bytes = b->free_bytes;
351         r->alloc_objects = b->allocs;
352         r->free_objects = b->frees;
353         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
354                 r->stk[i] = b->stk[i].pc;
355         for(; i<nelem(r->stk); i++)
356                 r->stk[i] = 0;
359 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
360         Bucket *b;
361         Record *r;
362         bool clear;
364         runtime_lock(&proflock);
365         n = 0;
366         clear = true;
367         for(b=mbuckets; b; b=b->allnext) {
368                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
369                         n++;
370                 if(b->allocs != 0 || b->frees != 0)
371                         clear = false;
372         }
373         if(clear) {
374                 // Absolutely no data, suggesting that a garbage collection
375                 // has not yet happened. In order to allow profiling when
376                 // garbage collection is disabled from the beginning of execution,
377                 // accumulate stats as if a GC just happened, and recount buckets.
378                 MProf_GC();
379                 n = 0;
380                 for(b=mbuckets; b; b=b->allnext)
381                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
382                                 n++;
383         }
384         ok = false;
385         if(n <= p.__count) {
386                 ok = true;
387                 r = (Record*)p.__values;
388                 for(b=mbuckets; b; b=b->allnext)
389                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
390                                 record(r++, b);
391         }
392         runtime_unlock(&proflock);
395 void
396 runtime_MProf_Mark(void (*addroot)(Obj))
398         // buckhash is not allocated via mallocgc.
399         addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
400         addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
401         addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0});
402         addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0});
405 // Must match BlockProfileRecord in debug.go.
406 typedef struct BRecord BRecord;
407 struct BRecord {
408         int64 count;
409         int64 cycles;
410         uintptr stk[32];
413 func BlockProfile(p Slice) (n int, ok bool) {
414         Bucket *b;
415         BRecord *r;
416         int32 i;
418         runtime_lock(&proflock);
419         n = 0;
420         for(b=bbuckets; b; b=b->allnext)
421                 n++;
422         ok = false;
423         if(n <= p.__count) {
424                 ok = true;
425                 r = (BRecord*)p.__values;
426                 for(b=bbuckets; b; b=b->allnext, r++) {
427                         r->count = b->count;
428                         r->cycles = b->cycles;
429                         for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
430                                 r->stk[i] = b->stk[i].pc;
431                         for(; (uintptr)i<nelem(r->stk); i++)
432                                 r->stk[i] = 0;                  
433                 }
434         }
435         runtime_unlock(&proflock);
438 // Must match StackRecord in debug.go.
439 typedef struct TRecord TRecord;
440 struct TRecord {
441         uintptr stk[32];
444 func ThreadCreateProfile(p Slice) (n int, ok bool) {
445         TRecord *r;
446         M *first, *mp;
447         int32 i;
448         
449         first = runtime_atomicloadp(&runtime_allm);
450         n = 0;
451         for(mp=first; mp; mp=mp->alllink)
452                 n++;
453         ok = false;
454         if(n <= p.__count) {
455                 ok = true;
456                 r = (TRecord*)p.__values;
457                 for(mp=first; mp; mp=mp->alllink) {
458                         for(i = 0; (uintptr)i < nelem(r->stk); i++) {
459                                 r->stk[i] = mp->createstack[i].pc;
460                         }
461                         r++;
462                 }
463         }
466 func Stack(b Slice, all bool) (n int) {
467         byte *pc, *sp;
468         bool enablegc;
469         
470         sp = runtime_getcallersp(&b);
471         pc = (byte*)(uintptr)runtime_getcallerpc(&b);
473         if(all) {
474                 runtime_semacquire(&runtime_worldsema, false);
475                 runtime_m()->gcing = 1;
476                 runtime_stoptheworld();
477                 enablegc = mstats.enablegc;
478                 mstats.enablegc = false;
479         }
481         if(b.__count == 0)
482                 n = 0;
483         else{
484                 G* g = runtime_g();
485                 g->writebuf = (byte*)b.__values;
486                 g->writenbuf = b.__count;
487                 USED(pc);
488                 USED(sp);
489                 runtime_goroutineheader(g);
490                 runtime_traceback();
491                 runtime_printcreatedby(g);
492                 if(all)
493                         runtime_tracebackothers(g);
494                 n = b.__count - g->writenbuf;
495                 g->writebuf = nil;
496                 g->writenbuf = 0;
497         }
498         
499         if(all) {
500                 runtime_m()->gcing = 0;
501                 mstats.enablegc = enablegc;
502                 runtime_semrelease(&runtime_worldsema);
503                 runtime_starttheworld();
504         }
507 static void
508 saveg(G *gp, TRecord *r)
510         int32 n, i;
511         Location locstk[nelem(r->stk)];
513         if(gp == runtime_g()) {
514                 n = runtime_callers(0, locstk, nelem(r->stk));
515                 for(i = 0; i < n; i++)
516                         r->stk[i] = locstk[i].pc;
517         }
518         else {
519                 // FIXME: Not implemented.
520                 n = 0;
521         }
522         if((size_t)n < nelem(r->stk))
523                 r->stk[n] = 0;
526 func GoroutineProfile(b Slice) (n int, ok bool) {
527         TRecord *r;
528         G *gp;
529         
530         ok = false;
531         n = runtime_gcount();
532         if(n <= b.__count) {
533                 runtime_semacquire(&runtime_worldsema, false);
534                 runtime_m()->gcing = 1;
535                 runtime_stoptheworld();
537                 n = runtime_gcount();
538                 if(n <= b.__count) {
539                         G* g = runtime_g();
540                         ok = true;
541                         r = (TRecord*)b.__values;
542                         saveg(g, r++);
543                         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
544                                 if(gp == g || gp->status == Gdead)
545                                         continue;
546                                 saveg(gp, r++);
547                         }
548                 }
549         
550                 runtime_m()->gcing = 0;
551                 runtime_semrelease(&runtime_worldsema);
552                 runtime_starttheworld();
553         }
556 void
557 runtime_mprofinit(void)
559         addrhash = runtime_persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys);