Daily bump.
[official-gcc.git] / libgo / runtime / mprof.goc
blobd9c220bca23fcefcae33d73171bb7bc5ae777979
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
8 package runtime
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "defs.h"
13 #include "go-type.h"
14 #include "go-string.h"
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
17 static Lock proflock;
19 // All memory allocations are local and do not escape outside of the profiler.
20 // The profiler is forbidden from referring to garbage-collected memory.
22 enum { MProf, BProf };  // profile types
24 // Per-call-stack profiling information.
25 // Lookup by hashing call stack into a linked-list hash table.
26 struct Bucket
28         Bucket  *next;  // next in hash list
29         Bucket  *allnext;       // next in list of all mbuckets/bbuckets
30         int32   typ;
31         // Generally unions can break precise GC,
32         // this one is fine because it does not contain pointers.
33         union
34         {
35                 struct  // typ == MProf
36                 {
37                         // The following complex 3-stage scheme of stats accumulation
38                         // is required to obtain a consistent picture of mallocs and frees
39                         // for some point in time.
40                         // The problem is that mallocs come in real time, while frees
41                         // come only after a GC during concurrent sweeping. So if we would
42                         // naively count them, we would get a skew toward mallocs.
43                         //
44                         // Mallocs are accounted in recent stats.
45                         // Explicit frees are accounted in recent stats.
46                         // GC frees are accounted in prev stats.
47                         // After GC prev stats are added to final stats and
48                         // recent stats are moved into prev stats.
49                         uintptr allocs;
50                         uintptr frees;
51                         uintptr alloc_bytes;
52                         uintptr free_bytes;
54                         uintptr prev_allocs;  // since last but one till last gc
55                         uintptr prev_frees;
56                         uintptr prev_alloc_bytes;
57                         uintptr prev_free_bytes;
59                         uintptr recent_allocs;  // since last gc till now
60                         uintptr recent_frees;
61                         uintptr recent_alloc_bytes;
62                         uintptr recent_free_bytes;
64                 };
65                 struct  // typ == BProf
66                 {
67                         int64   count;
68                         int64   cycles;
69                 };
70         };
71         uintptr hash;   // hash of size + stk
72         uintptr size;
73         uintptr nstk;
74         Location stk[1];
76 enum {
77         BuckHashSize = 179999,
79 static Bucket **buckhash;
80 static Bucket *mbuckets;  // memory profile buckets
81 static Bucket *bbuckets;  // blocking profile buckets
82 static uintptr bucketmem;
84 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
85 static Bucket*
86 stkbucket(int32 typ, uintptr size, Location *stk, int32 nstk, bool alloc)
88         int32 i, j;
89         uintptr h;
90         Bucket *b;
92         if(buckhash == nil) {
93                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys);
94                 if(buckhash == nil)
95                         runtime_throw("runtime: cannot allocate memory");
96         }
98         // Hash stack.
99         h = 0;
100         for(i=0; i<nstk; i++) {
101                 h += stk[i].pc;
102                 h += h<<10;
103                 h ^= h>>6;
104         }
105         // hash in size
106         h += size;
107         h += h<<10;
108         h ^= h>>6;
109         // finalize
110         h += h<<3;
111         h ^= h>>11;
113         i = h%BuckHashSize;
114         for(b = buckhash[i]; b; b=b->next) {
115                 if(b->typ == typ && b->hash == h && b->size == size && b->nstk == (uintptr)nstk) {
116                         for(j = 0; j < nstk; j++) {
117                                 if(b->stk[j].pc != stk[j].pc ||
118                                    b->stk[j].lineno != stk[j].lineno ||
119                                    !__go_strings_equal(b->stk[j].filename, stk[j].filename))
120                                         break;
121                         }
122                         if (j == nstk)
123                                 return b;
124                 }
125         }
127         if(!alloc)
128                 return nil;
130         b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys);
131         bucketmem += sizeof *b + nstk*sizeof stk[0];
132         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
133         b->typ = typ;
134         b->hash = h;
135         b->size = size;
136         b->nstk = nstk;
137         b->next = buckhash[i];
138         buckhash[i] = b;
139         if(typ == MProf) {
140                 b->allnext = mbuckets;
141                 mbuckets = b;
142         } else {
143                 b->allnext = bbuckets;
144                 bbuckets = b;
145         }
146         return b;
149 static void
150 MProf_GC(void)
152         Bucket *b;
154         for(b=mbuckets; b; b=b->allnext) {
155                 b->allocs += b->prev_allocs;
156                 b->frees += b->prev_frees;
157                 b->alloc_bytes += b->prev_alloc_bytes;
158                 b->free_bytes += b->prev_free_bytes;
160                 b->prev_allocs = b->recent_allocs;
161                 b->prev_frees = b->recent_frees;
162                 b->prev_alloc_bytes = b->recent_alloc_bytes;
163                 b->prev_free_bytes = b->recent_free_bytes;
165                 b->recent_allocs = 0;
166                 b->recent_frees = 0;
167                 b->recent_alloc_bytes = 0;
168                 b->recent_free_bytes = 0;
169         }
172 // Record that a gc just happened: all the 'recent' statistics are now real.
173 void
174 runtime_MProf_GC(void)
176         runtime_lock(&proflock);
177         MProf_GC();
178         runtime_unlock(&proflock);
181 // Called by malloc to record a profiled block.
182 void
183 runtime_MProf_Malloc(void *p, uintptr size)
185         Location stk[32];
186         Bucket *b;
187         int32 nstk;
189         nstk = runtime_callers(1, stk, nelem(stk), false);
190         runtime_lock(&proflock);
191         b = stkbucket(MProf, size, stk, nstk, true);
192         b->recent_allocs++;
193         b->recent_alloc_bytes += size;
194         runtime_unlock(&proflock);
196         // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
197         // This reduces potential contention and chances of deadlocks.
198         // Since the object must be alive during call to MProf_Malloc,
199         // it's fine to do this non-atomically.
200         runtime_setprofilebucket(p, b);
203 // Called when freeing a profiled block.
204 void
205 runtime_MProf_Free(Bucket *b, uintptr size, bool freed)
207         runtime_lock(&proflock);
208         if(freed) {
209                 b->recent_frees++;
210                 b->recent_free_bytes += size;
211         } else {
212                 b->prev_frees++;
213                 b->prev_free_bytes += size;
214         }
215         runtime_unlock(&proflock);
218 int64 runtime_blockprofilerate;  // in CPU ticks
220 void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate");
222 void
223 runtime_SetBlockProfileRate(intgo rate)
225         int64 r;
227         if(rate <= 0)
228                 r = 0;  // disable profiling
229         else {
230                 // convert ns to cycles, use float64 to prevent overflow during multiplication
231                 r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000);
232                 if(r == 0)
233                         r = 1;
234         }
235         runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r);
238 void
239 runtime_blockevent(int64 cycles, int32 skip)
241         int32 nstk;
242         int64 rate;
243         Location stk[32];
244         Bucket *b;
246         if(cycles <= 0)
247                 return;
248         rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate);
249         if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
250                 return;
252         nstk = runtime_callers(skip, stk, nelem(stk), false);
253         runtime_lock(&proflock);
254         b = stkbucket(BProf, 0, stk, nstk, true);
255         b->count++;
256         b->cycles += cycles;
257         runtime_unlock(&proflock);
260 // Go interface to profile data.  (Declared in debug.go)
262 // Must match MemProfileRecord in debug.go.
263 typedef struct Record Record;
264 struct Record {
265         int64 alloc_bytes, free_bytes;
266         int64 alloc_objects, free_objects;
267         uintptr stk[32];
270 // Write b's data to r.
271 static void
272 record(Record *r, Bucket *b)
274         uint32 i;
276         r->alloc_bytes = b->alloc_bytes;
277         r->free_bytes = b->free_bytes;
278         r->alloc_objects = b->allocs;
279         r->free_objects = b->frees;
280         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
281                 r->stk[i] = b->stk[i].pc;
282         for(; i<nelem(r->stk); i++)
283                 r->stk[i] = 0;
286 func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
287         Bucket *b;
288         Record *r;
289         bool clear;
291         runtime_lock(&proflock);
292         n = 0;
293         clear = true;
294         for(b=mbuckets; b; b=b->allnext) {
295                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
296                         n++;
297                 if(b->allocs != 0 || b->frees != 0)
298                         clear = false;
299         }
300         if(clear) {
301                 // Absolutely no data, suggesting that a garbage collection
302                 // has not yet happened. In order to allow profiling when
303                 // garbage collection is disabled from the beginning of execution,
304                 // accumulate stats as if a GC just happened, and recount buckets.
305                 MProf_GC();
306                 MProf_GC();
307                 n = 0;
308                 for(b=mbuckets; b; b=b->allnext)
309                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
310                                 n++;
311         }
312         ok = false;
313         if(n <= p.__count) {
314                 ok = true;
315                 r = (Record*)p.__values;
316                 for(b=mbuckets; b; b=b->allnext)
317                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
318                                 record(r++, b);
319         }
320         runtime_unlock(&proflock);
323 void
324 runtime_MProf_Mark(struct Workbuf **wbufp, void (*enqueue1)(struct Workbuf**, Obj))
326         // buckhash is not allocated via mallocgc.
327         enqueue1(wbufp, (Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
328         enqueue1(wbufp, (Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
331 void
332 runtime_iterate_memprof(void (*callback)(Bucket*, uintptr, Location*, uintptr, uintptr, uintptr))
334         Bucket *b;
336         runtime_lock(&proflock);
337         for(b=mbuckets; b; b=b->allnext) {
338                 callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees);
339         }
340         runtime_unlock(&proflock);
343 // Must match BlockProfileRecord in debug.go.
344 typedef struct BRecord BRecord;
345 struct BRecord {
346         int64 count;
347         int64 cycles;
348         uintptr stk[32];
351 func BlockProfile(p Slice) (n int, ok bool) {
352         Bucket *b;
353         BRecord *r;
354         int32 i;
356         runtime_lock(&proflock);
357         n = 0;
358         for(b=bbuckets; b; b=b->allnext)
359                 n++;
360         ok = false;
361         if(n <= p.__count) {
362                 ok = true;
363                 r = (BRecord*)p.__values;
364                 for(b=bbuckets; b; b=b->allnext, r++) {
365                         r->count = b->count;
366                         r->cycles = b->cycles;
367                         for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++)
368                                 r->stk[i] = b->stk[i].pc;
369                         for(; (uintptr)i<nelem(r->stk); i++)
370                                 r->stk[i] = 0;                  
371                 }
372         }
373         runtime_unlock(&proflock);
376 // Must match StackRecord in debug.go.
377 typedef struct TRecord TRecord;
378 struct TRecord {
379         uintptr stk[32];
382 func ThreadCreateProfile(p Slice) (n int, ok bool) {
383         TRecord *r;
384         M *first, *mp;
385         int32 i;
386         
387         first = runtime_atomicloadp(&runtime_allm);
388         n = 0;
389         for(mp=first; mp; mp=mp->alllink)
390                 n++;
391         ok = false;
392         if(n <= p.__count) {
393                 ok = true;
394                 r = (TRecord*)p.__values;
395                 for(mp=first; mp; mp=mp->alllink) {
396                         for(i = 0; (uintptr)i < nelem(r->stk); i++) {
397                                 r->stk[i] = mp->createstack[i].pc;
398                         }
399                         r++;
400                 }
401         }
404 func Stack(b Slice, all bool) (n int) {
405         byte *pc, *sp;
406         bool enablegc;
407         
408         sp = runtime_getcallersp(&b);
409         pc = (byte*)(uintptr)runtime_getcallerpc(&b);
411         if(all) {
412                 runtime_semacquire(&runtime_worldsema, false);
413                 runtime_m()->gcing = 1;
414                 runtime_stoptheworld();
415                 enablegc = mstats.enablegc;
416                 mstats.enablegc = false;
417         }
419         if(b.__count == 0)
420                 n = 0;
421         else{
422                 G* g = runtime_g();
423                 g->writebuf = (byte*)b.__values;
424                 g->writenbuf = b.__count;
425                 USED(pc);
426                 USED(sp);
427                 runtime_goroutineheader(g);
428                 runtime_traceback();
429                 runtime_printcreatedby(g);
430                 if(all)
431                         runtime_tracebackothers(g);
432                 n = b.__count - g->writenbuf;
433                 g->writebuf = nil;
434                 g->writenbuf = 0;
435         }
436         
437         if(all) {
438                 runtime_m()->gcing = 0;
439                 mstats.enablegc = enablegc;
440                 runtime_semrelease(&runtime_worldsema);
441                 runtime_starttheworld();
442         }
445 static void
446 saveg(G *gp, TRecord *r)
448         int32 n, i;
449         Location locstk[nelem(r->stk)];
451         if(gp == runtime_g()) {
452                 n = runtime_callers(0, locstk, nelem(r->stk), false);
453                 for(i = 0; i < n; i++)
454                         r->stk[i] = locstk[i].pc;
455         }
456         else {
457                 // FIXME: Not implemented.
458                 n = 0;
459         }
460         if((size_t)n < nelem(r->stk))
461                 r->stk[n] = 0;
464 func GoroutineProfile(b Slice) (n int, ok bool) {
465         uintptr i;
466         TRecord *r;
467         G *gp;
468         
469         ok = false;
470         n = runtime_gcount();
471         if(n <= b.__count) {
472                 runtime_semacquire(&runtime_worldsema, false);
473                 runtime_m()->gcing = 1;
474                 runtime_stoptheworld();
476                 n = runtime_gcount();
477                 if(n <= b.__count) {
478                         G* g = runtime_g();
479                         ok = true;
480                         r = (TRecord*)b.__values;
481                         saveg(g, r++);
482                         for(i = 0; i < runtime_allglen; i++) {
483                                 gp = runtime_allg[i];
484                                 if(gp == g || gp->status == Gdead)
485                                         continue;
486                                 saveg(gp, r++);
487                         }
488                 }
489         
490                 runtime_m()->gcing = 0;
491                 runtime_semrelease(&runtime_worldsema);
492                 runtime_starttheworld();
493         }
496 // Tracing of alloc/free/gc.
498 static Lock tracelock;
500 static const char*
501 typeinfoname(int32 typeinfo)
503         if(typeinfo == TypeInfo_SingleObject)
504                 return "single object";
505         else if(typeinfo == TypeInfo_Array)
506                 return "array";
507         else if(typeinfo == TypeInfo_Chan)
508                 return "channel";
509         runtime_throw("typinfoname: unknown type info");
510         return nil;
513 void
514 runtime_tracealloc(void *p, uintptr size, uintptr typ)
516         const char *name;
517         Type *type;
519         runtime_lock(&tracelock);
520         runtime_m()->traceback = 2;
521         type = (Type*)(typ & ~3);
522         name = typeinfoname(typ & 3);
523         if(type == nil)
524                 runtime_printf("tracealloc(%p, %p, %s)\n", p, size, name);
525         else    
526                 runtime_printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->__reflection);
527         if(runtime_m()->curg == nil || runtime_g() == runtime_m()->curg) {
528                 runtime_goroutineheader(runtime_g());
529                 runtime_traceback();
530         } else {
531                 runtime_goroutineheader(runtime_m()->curg);
532                 runtime_traceback();
533         }
534         runtime_printf("\n");
535         runtime_m()->traceback = 0;
536         runtime_unlock(&tracelock);
539 void
540 runtime_tracefree(void *p, uintptr size)
542         runtime_lock(&tracelock);
543         runtime_m()->traceback = 2;
544         runtime_printf("tracefree(%p, %p)\n", p, size);
545         runtime_goroutineheader(runtime_g());
546         runtime_traceback();
547         runtime_printf("\n");
548         runtime_m()->traceback = 0;
549         runtime_unlock(&tracelock);
552 void
553 runtime_tracegc(void)
555         runtime_lock(&tracelock);
556         runtime_m()->traceback = 2;
557         runtime_printf("tracegc()\n");
558         // running on m->g0 stack; show all non-g0 goroutines
559         runtime_tracebackothers(runtime_g());
560         runtime_printf("end tracegc\n");
561         runtime_printf("\n");
562         runtime_m()->traceback = 0;
563         runtime_unlock(&tracelock);