Update gcc/ChangeLog for r174861.
[official-gcc.git] / libgo / runtime / mprof.goc
blob2e147edda02f1d9a0fd58ea5e6f5c017a4fa3281
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Malloc profiling.
6 // Patterned after tcmalloc's algorithms; shorter code.
8 package runtime
9 #include "runtime.h"
10 #include "malloc.h"
11 #include "defs.h"
12 #include "go-type.h"
14 typedef struct __go_open_array Slice;
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
17 static Lock proflock;
19 // Per-call-stack allocation information.
20 // Lookup by hashing call stack into a linked-list hash table.
21 typedef struct Bucket Bucket;
22 struct Bucket
24         Bucket  *next;  // next in hash list
25         Bucket  *allnext;       // next in list of all buckets
26         uintptr allocs;
27         uintptr frees;
28         uintptr alloc_bytes;
29         uintptr free_bytes;
30         uintptr hash;
31         uintptr nstk;
32         uintptr stk[1];
34 enum {
35         BuckHashSize = 179999,
37 static Bucket **buckhash;
38 static Bucket *buckets;
39 static uintptr bucketmem;
41 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
42 static Bucket*
43 stkbucket(uintptr *stk, int32 nstk)
45         int32 i;
46         uintptr h;
47         Bucket *b;
49         if(buckhash == nil) {
50                 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
51                 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
52         }
54         // Hash stack.
55         h = 0;
56         for(i=0; i<nstk; i++) {
57                 h += stk[i];
58                 h += h<<10;
59                 h ^= h>>6;
60         }
61         h += h<<3;
62         h ^= h>>11;
64         i = h%BuckHashSize;
65         for(b = buckhash[i]; b; b=b->next)
66                 if(b->hash == h && b->nstk == (uintptr)nstk &&
67                    runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
68                         return b;
70         b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
71         bucketmem += sizeof *b + nstk*sizeof stk[0];
72         runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
73         b->hash = h;
74         b->nstk = nstk;
75         b->next = buckhash[i];
76         buckhash[i] = b;
77         b->allnext = buckets;
78         buckets = b;
79         return b;
82 // Map from pointer to Bucket* that allocated it.
83 // Three levels:
84 //      Linked-list hash table for top N-20 bits.
85 //      Array index for next 13 bits.
86 //      Linked list for next 7 bits.
87 // This is more efficient than using a general map,
88 // because of the typical clustering of the pointer keys.
90 typedef struct AddrHash AddrHash;
91 typedef struct AddrEntry AddrEntry;
93 struct AddrHash
95         AddrHash *next; // next in top-level hash table linked list
96         uintptr addr;   // addr>>20
97         AddrEntry *dense[1<<13];
100 struct AddrEntry
102         AddrEntry *next;        // next in bottom-level linked list
103         uint32 addr;
104         Bucket *b;
107 enum {
108         AddrHashBits = 12       // 1MB per entry, so good for 4GB of used address space
110 static AddrHash *addrhash[1<<AddrHashBits];
111 static AddrEntry *addrfree;
112 static uintptr addrmem;
114 // Multiplicative hash function:
115 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
116 // This is a good multiplier as suggested in CLR, Knuth.  The hash
117 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
118 // of the muliplied value.
119 enum {
120         HashMultiplier = 2654435769U
123 // Set the bucket associated with addr to b.
124 static void
125 setaddrbucket(uintptr addr, Bucket *b)
127         int32 i;
128         uint32 h;
129         AddrHash *ah;
130         AddrEntry *e;
132         h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
133         for(ah=addrhash[h]; ah; ah=ah->next)
134                 if(ah->addr == (addr>>20))
135                         goto found;
137         ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
138         addrmem += sizeof *ah;
139         ah->next = addrhash[h];
140         ah->addr = addr>>20;
141         addrhash[h] = ah;
143 found:
144         if((e = addrfree) == nil) {
145                 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
146                 addrmem += 64*sizeof *e;
147                 for(i=0; i+1<64; i++)
148                         e[i].next = &e[i+1];
149                 e[63].next = nil;
150         }
151         addrfree = e->next;
152         e->addr = (uint32)~(addr & ((1<<20)-1));
153         e->b = b;
154         h = (addr>>7)&(nelem(ah->dense)-1);     // entry in dense is top 13 bits of low 20.
155         e->next = ah->dense[h];
156         ah->dense[h] = e;
159 // Get the bucket associated with addr and clear the association.
160 static Bucket*
161 getaddrbucket(uintptr addr)
163         uint32 h;
164         AddrHash *ah;
165         AddrEntry *e, **l;
166         Bucket *b;
168         h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
169         for(ah=addrhash[h]; ah; ah=ah->next)
170                 if(ah->addr == (addr>>20))
171                         goto found;
172         return nil;
174 found:
175         h = (addr>>7)&(nelem(ah->dense)-1);     // entry in dense is top 13 bits of low 20.
176         for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
177                 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
178                         *l = e->next;
179                         b = e->b;
180                         e->next = addrfree;
181                         addrfree = e;
182                         return b;
183                 }
184         }
185         return nil;
188 void
189 runtime_Mprof_Init()
191         runtime_initlock(&proflock);
194 // Called by malloc to record a profiled block.
195 void
196 runtime_MProf_Malloc(void *p, uintptr size)
198         int32 nstk;
199         uintptr stk[32];
200         Bucket *b;
202         if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
203                 return;
204 #if 0
205         nstk = runtime_callers(1, stk, 32);
206 #else
207         nstk = 0;
208 #endif
209         runtime_lock(&proflock);
210         b = stkbucket(stk, nstk);
211         b->allocs++;
212         b->alloc_bytes += size;
213         setaddrbucket((uintptr)p, b);
214         runtime_unlock(&proflock);
215         __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
217         if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
218                 __go_run_goroutine_gc(100);
221 // Called when freeing a profiled block.
222 void
223 runtime_MProf_Free(void *p, uintptr size)
225         Bucket *b;
227         if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
228                 return;
230         runtime_lock(&proflock);
231         b = getaddrbucket((uintptr)p);
232         if(b != nil) {
233                 b->frees++;
234                 b->free_bytes += size;
235         }
236         runtime_unlock(&proflock);
237         __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
239         if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
240                 __go_run_goroutine_gc(101);
244 // Go interface to profile data.  (Declared in extern.go)
245 // Assumes Go sizeof(int) == sizeof(int32)
247 // Must match MemProfileRecord in extern.go.
248 typedef struct Record Record;
249 struct Record {
250         int64 alloc_bytes, free_bytes;
251         int64 alloc_objects, free_objects;
252         uintptr stk[32];
255 // Write b's data to r.
256 static void
257 record(Record *r, Bucket *b)
259         uint32 i;
261         r->alloc_bytes = b->alloc_bytes;
262         r->free_bytes = b->free_bytes;
263         r->alloc_objects = b->allocs;
264         r->free_objects = b->frees;
265         for(i=0; i<b->nstk && i<nelem(r->stk); i++)
266                 r->stk[i] = b->stk[i];
267         for(; i<nelem(r->stk); i++)
268                 r->stk[i] = 0;
271 func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
272         Bucket *b;
273         Record *r;
275         __sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
277         runtime_lock(&proflock);
278         n = 0;
279         for(b=buckets; b; b=b->allnext)
280                 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
281                         n++;
282         ok = false;
283         if(n <= p.__count) {
284                 ok = true;
285                 r = (Record*)p.__values;
286                 for(b=buckets; b; b=b->allnext)
287                         if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
288                                 record(r++, b);
289         }
290         runtime_unlock(&proflock);
292         __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
294         if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
295                 __go_run_goroutine_gc(102);
298 void
299 runtime_MProf_Mark(void (*scan)(byte *, int64))
301         // buckhash is not allocated via mallocgc.
302         scan((byte*)&buckets, sizeof buckets);
303         scan((byte*)&addrhash, sizeof addrhash);
304         scan((byte*)&addrfree, sizeof addrfree);