1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // Patterned after tcmalloc's algorithms; shorter code.
14 typedef struct __go_open_array Slice;
16 // NOTE(rsc): Everything here could use cas if contention became an issue.
19 // Per-call-stack allocation information.
20 // Lookup by hashing call stack into a linked-list hash table.
21 typedef struct Bucket Bucket;
24 Bucket *next; // next in hash list
25 Bucket *allnext; // next in list of all buckets
35 BuckHashSize = 179999,
37 static Bucket **buckhash;
38 static Bucket *buckets;
39 static uintptr bucketmem;
41 // Return the bucket for stk[0:nstk], allocating new bucket if needed.
43 stkbucket(uintptr *stk, int32 nstk)
50 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
51 mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
56 for(i=0; i<nstk; i++) {
65 for(b = buckhash[i]; b; b=b->next)
66 if(b->hash == h && b->nstk == (uintptr)nstk &&
67 runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
70 b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
71 bucketmem += sizeof *b + nstk*sizeof stk[0];
72 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
75 b->next = buckhash[i];
82 // Map from pointer to Bucket* that allocated it.
84 // Linked-list hash table for top N-20 bits.
85 // Array index for next 13 bits.
86 // Linked list for next 7 bits.
87 // This is more efficient than using a general map,
88 // because of the typical clustering of the pointer keys.
90 typedef struct AddrHash AddrHash;
91 typedef struct AddrEntry AddrEntry;
95 AddrHash *next; // next in top-level hash table linked list
96 uintptr addr; // addr>>20
97 AddrEntry *dense[1<<13];
102 AddrEntry *next; // next in bottom-level linked list
108 AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
110 static AddrHash *addrhash[1<<AddrHashBits];
111 static AddrEntry *addrfree;
112 static uintptr addrmem;
114 // Multiplicative hash function:
115 // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
116 // This is a good multiplier as suggested in CLR, Knuth. The hash
117 // value is taken to be the top AddrHashBits bits of the bottom 32 bits
118 // of the muliplied value.
120 HashMultiplier = 2654435769U
123 // Set the bucket associated with addr to b.
125 setaddrbucket(uintptr addr, Bucket *b)
132 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
133 for(ah=addrhash[h]; ah; ah=ah->next)
134 if(ah->addr == (addr>>20))
137 ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
138 addrmem += sizeof *ah;
139 ah->next = addrhash[h];
144 if((e = addrfree) == nil) {
145 e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
146 addrmem += 64*sizeof *e;
147 for(i=0; i+1<64; i++)
152 e->addr = (uint32)~(addr & ((1<<20)-1));
154 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
155 e->next = ah->dense[h];
159 // Get the bucket associated with addr and clear the association.
161 getaddrbucket(uintptr addr)
168 h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
169 for(ah=addrhash[h]; ah; ah=ah->next)
170 if(ah->addr == (addr>>20))
175 h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20.
176 for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
177 if(e->addr == (uint32)~(addr & ((1<<20)-1))) {
191 runtime_initlock(&proflock);
194 // Called by malloc to record a profiled block.
196 runtime_MProf_Malloc(void *p, uintptr size)
202 if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
205 nstk = runtime_callers(1, stk, 32);
209 runtime_lock(&proflock);
210 b = stkbucket(stk, nstk);
212 b->alloc_bytes += size;
213 setaddrbucket((uintptr)p, b);
214 runtime_unlock(&proflock);
215 __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
217 if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
218 __go_run_goroutine_gc(100);
221 // Called when freeing a profiled block.
223 runtime_MProf_Free(void *p, uintptr size)
227 if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
230 runtime_lock(&proflock);
231 b = getaddrbucket((uintptr)p);
234 b->free_bytes += size;
236 runtime_unlock(&proflock);
237 __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
239 if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
240 __go_run_goroutine_gc(101);
244 // Go interface to profile data. (Declared in extern.go)
245 // Assumes Go sizeof(int) == sizeof(int32)
247 // Must match MemProfileRecord in extern.go.
248 typedef struct Record Record;
250 int64 alloc_bytes, free_bytes;
251 int64 alloc_objects, free_objects;
255 // Write b's data to r.
257 record(Record *r, Bucket *b)
261 r->alloc_bytes = b->alloc_bytes;
262 r->free_bytes = b->free_bytes;
263 r->alloc_objects = b->allocs;
264 r->free_objects = b->frees;
265 for(i=0; i<b->nstk && i<nelem(r->stk); i++)
266 r->stk[i] = b->stk[i];
267 for(; i<nelem(r->stk); i++)
271 func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
275 __sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
277 runtime_lock(&proflock);
279 for(b=buckets; b; b=b->allnext)
280 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
285 r = (Record*)p.__values;
286 for(b=buckets; b; b=b->allnext)
287 if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
290 runtime_unlock(&proflock);
292 __sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
294 if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
295 __go_run_goroutine_gc(102);
299 runtime_MProf_Mark(void (*scan)(byte *, int64))
301 // buckhash is not allocated via mallocgc.
302 scan((byte*)&buckets, sizeof buckets);
303 scan((byte*)&addrhash, sizeof addrhash);
304 scan((byte*)&addrfree, sizeof addrfree);