2014-07-29 Ed Smith-Rowland <3dw4rd@verizon.net>
[official-gcc.git] / libcilkrts / runtime / frame_malloc.c
blob0b38bd209a90f480c9671ffcb3c8ca30acfe3976
1 /* frame_malloc.c -*-C-*-
3 *************************************************************************
5 * @copyright
6 * Copyright (C) 2009-2013, Intel Corporation
7 * All rights reserved.
8 *
9 * @copyright
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 * * Neither the name of Intel Corporation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
24 * @copyright
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 **************************************************************************/
39 #include "frame_malloc.h"
40 #include "bug.h"
41 #include "local_state.h"
42 #include "cilk_malloc.h"
44 #ifndef __VXWORKS__
45 #include <memory.h>
46 #endif
48 /* #define USE_MMAP 1 */
49 #if USE_MMAP
50 #define __USE_MISC 1
51 #include <sys/mman.h>
52 #include <errno.h>
53 #endif
55 // Define to fill the stack frame header with the fill character when pushing
56 // it on a free list. Note that this should be #ifdef'd out when checked in!
58 #ifdef _DEBUG
59 #define HEADER_FILL_CHAR 0xbf
60 #endif
62 // HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
63 // message if this is a release build
65 #if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
66 #pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
67 #endif
69 static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size);
71 #ifndef _WIN32
73 const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] =
75 64, 128, 256, 512, 1024, 2048
78 #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
80 /* threshold above which we use slow malloc */
81 #define FRAME_MALLOC_MAX_SIZE 2048
83 #else // _WIN32
85 /* Note that this must match the implementation of framesz_to_bucket in
86 * asmilator/layout.ml! */
87 #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
89 /* threshold above which we use slow malloc */
90 #define FRAME_MALLOC_MAX_SIZE \
91 FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
93 #endif // _WIN32
95 /* utility procedures */
96 static void push(struct free_list **b, struct free_list *p)
98 #ifdef HEADER_FILL_CHAR
99 memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0));
100 #endif
101 /* cons! onto free list */
102 p->cdr = *b;
103 *b = p;
106 static struct free_list *pop(struct free_list **b)
108 struct free_list *p = *b;
109 if (p)
110 *b = p->cdr;
111 return p;
114 /*************************************************************
115 global allocator:
116 *************************************************************/
117 /* request slightly less than 2^K from the OS, which after malloc
118 overhead and alignment should end up filling each VM page almost
119 completely. 128 is a guess of the total malloc overhead and cache
120 line alignment */
121 #define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
123 /** Implements linked list of frames */
124 struct pool_cons {
125 char *p; /**< This element of the list */
126 struct pool_cons *cdr; /**< Remainder of the list */
129 static void extend_global_pool(global_state_t *g)
131 /* FIXME: memalign to a cache line? */
132 struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c));
133 g->frame_malloc.pool_begin =
134 (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK);
135 g->frame_malloc.pool_end =
136 g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK;
137 g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK;
138 c->p = g->frame_malloc.pool_begin;
139 c->cdr = g->frame_malloc.pool_list;
140 g->frame_malloc.pool_list = c;
143 /* the size is already canonicalized at this point */
144 static struct free_list *global_alloc(global_state_t *g, int bucket)
146 struct free_list *mem;
147 size_t size;
149 CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
150 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
151 g->frame_malloc.allocated_from_global_pool += size;
153 if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) {
155 CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end);
156 if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) {
157 /* We waste the fragment of pool. */
158 g->frame_malloc.wasted +=
159 g->frame_malloc.pool_end - g->frame_malloc.pool_begin;
160 extend_global_pool(g);
162 mem = (struct free_list *)g->frame_malloc.pool_begin;
163 g->frame_malloc.pool_begin += size;
166 return mem;
169 static void global_free(global_state_t *g, void *mem, int bucket)
171 size_t size;
173 CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
174 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
175 g->frame_malloc.allocated_from_global_pool -= size;
177 push(&g->frame_malloc.global_free_list[bucket], mem);
180 void __cilkrts_frame_malloc_global_init(global_state_t *g)
182 int i;
184 __cilkrts_mutex_init(&g->frame_malloc.lock);
185 g->frame_malloc.check_for_leaks = 1;
186 g->frame_malloc.pool_list = 0;
187 g->frame_malloc.pool_begin = 0;
188 g->frame_malloc.pool_end = 0;
189 g->frame_malloc.batch_size = 8000;
190 g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size;
191 g->frame_malloc.allocated_from_os = 0;
192 g->frame_malloc.allocated_from_global_pool = 0;
193 g->frame_malloc.wasted = 0;
194 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
195 g->frame_malloc.global_free_list[i] = 0;
198 // Counts how many bytes are in the global free list.
199 static size_t count_memory_in_global_list(global_state_t *g)
202 // Count the memory remaining in the global free list.
203 size_t size_remaining_in_global_list = 0;
204 int i;
205 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
206 struct free_list *p;
207 size_t size_in_bucket = 0;
208 p = g->frame_malloc.global_free_list[i];
210 while (p) {
211 size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i);
212 p = p->cdr;
214 size_remaining_in_global_list += size_in_bucket;
216 return size_remaining_in_global_list;
220 void __cilkrts_frame_malloc_global_cleanup(global_state_t *g)
222 struct pool_cons *c;
224 if (g->frame_malloc.check_for_leaks) {
225 size_t memory_in_global_list = count_memory_in_global_list(g);
226 // TBD: This check is weak. Short of memory corruption,
227 // I don't see how we have more memory in the free list
228 // than allocated from the os.
229 // Ideally, we should count the memory in the global free list
230 // and check that we have it all. But I believe the runtime
231 // itself also uses some memory, which is not being tracked.
232 if (memory_in_global_list > g->frame_malloc.allocated_from_os) {
233 __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
237 while ((c = g->frame_malloc.pool_list)) {
238 g->frame_malloc.pool_list = c->cdr;
239 __cilkrts_free(c->p);
240 __cilkrts_free(c);
243 __cilkrts_mutex_destroy(0, &g->frame_malloc.lock);
245 // Check that all the memory moved from the global pool into
246 // workers has been returned to the global pool.
247 if (g->frame_malloc.check_for_leaks
248 && (g->frame_malloc.allocated_from_global_pool != 0))
250 __cilkrts_bug("\n"
251 "---------------------------" "\n"
252 " MEMORY LEAK DETECTED!!! " "\n"
253 "---------------------------" "\n"
254 "\n"
259 /*************************************************************
260 per-worker allocator
261 *************************************************************/
262 /* allocate a batch of frames of size SIZE from the global pool and
263 store them in the worker's free list */
264 static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size)
266 global_state_t *g = w->g;
268 __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
269 #if USE_MMAP
270 char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
271 if (p == MAP_FAILED)
272 __cilkrts_bug("mmap failed %d", errno);
273 assert(size < 4096);
274 assert(p != MAP_FAILED);
275 mprotect(p, 4096, PROT_NONE);
276 mprotect(p + 8192, 4096, PROT_NONE);
277 w->l->bucket_potential[bucket] += size;
278 push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size));
279 #else
280 size_t bytes_allocated = 0;
281 do {
282 w->l->bucket_potential[bucket] += size;
283 bytes_allocated += size;
284 push(&w->l->free_list[bucket], global_alloc(g, bucket));
285 } while (bytes_allocated < g->frame_malloc.batch_size);
286 #endif
287 } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
291 static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size)
293 struct free_list *p, *q;
294 global_state_t *g = w->g;
295 size_t pot = w->l->bucket_potential[bucket];
296 size_t newpot;
298 /* Keep up to POT/2 elements in the free list. The cost of
299 counting up to POT/2 is amortized against POT. */
300 newpot = 0;
301 for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot;
302 p = p->cdr, newpot += size)
304 w->l->bucket_potential[bucket] = newpot;
306 if (p) {
307 /* free the rest of the list. The cost of grabbing the lock
308 is amortized against POT/2; the cost of traversing the rest
309 of the list is amortized against the free operation that
310 puts the element on the list. */
311 __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
312 while ((q = pop(&p->cdr)))
313 #if USE_MMAP
314 munmap((char *)q + size - 8192, 12288);
315 #else
316 global_free(g, q, bucket);
317 #endif
318 } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
322 // Free all the memory in this bucket for the specified worker,
323 // returning it to the global pool's free list.
324 static void move_bucket_to_global_free_list(__cilkrts_worker *w,
325 int bucket)
327 struct free_list *p, *q;
328 global_state_t *g = w->g;
329 p = w->l->free_list[bucket];
331 if (p) {
332 __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
333 while ((q = pop(&p))) {
334 #if USE_MMAP
335 size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
336 munmap((char *)q + size - 8192, 12288);
337 #else
338 global_free(g, q, bucket);
339 #endif
341 } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
344 // I'm not sure this does anything useful now, since
345 // the worker is about to be destroyed. But why not?
346 w->l->bucket_potential[bucket] = 0;
349 static int bucket_of_size(size_t size)
351 int i;
353 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
354 if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i))
355 return i;
357 CILK_ASSERT(0 /* can't happen */);
358 return -1;
361 size_t __cilkrts_frame_malloc_roundup(size_t size)
363 if (size > FRAME_MALLOC_MAX_SIZE) {
364 /* nothing, leave it alone */
365 } else {
366 int bucket = bucket_of_size(size);
367 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
369 return size;
372 size_t __cilkrts_size_of_bucket(int bucket)
374 CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS);
375 return FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
378 void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
380 int bucket;
381 void *mem;
383 /* if too large, or if no worker, fall back to __cilkrts_malloc() */
384 if (!w || size > FRAME_MALLOC_MAX_SIZE) {
385 NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
386 return __cilkrts_malloc(size);
389 START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
390 bucket = bucket_of_size(size);
391 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
393 while (!(mem = pop(&w->l->free_list[bucket]))) {
394 /* get a batch of frames from the global pool */
395 START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
396 allocate_batch(w, bucket, size);
397 } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
399 } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
401 return mem;
404 void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size)
406 int bucket;
407 struct free_list *p = (struct free_list *)p0;
409 /* if too large, or if no worker, fall back to __cilkrts_free() */
410 if (!w || size > FRAME_MALLOC_MAX_SIZE) {
411 NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE);
412 __cilkrts_free(p);
413 return;
416 #if CILK_LIB_DEBUG
417 *(volatile long *)w;
418 #endif
420 START_INTERVAL(w, INTERVAL_FRAME_FREE); {
421 bucket = bucket_of_size(size);
422 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
423 w->l->bucket_potential[bucket] += size;
424 push(&w->l->free_list[bucket], p);
425 if (w->l->bucket_potential[bucket] >
426 w->g->frame_malloc.potential_limit) {
427 START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) {
428 gc_bucket(w, bucket, size);
429 } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL);
431 } STOP_INTERVAL(w, INTERVAL_FRAME_FREE);
434 void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w)
436 int i;
437 local_state *l = w->l;
439 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
440 l->free_list[i] = 0;
441 l->bucket_potential[i] = 0;
445 void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w)
447 int i;
448 // Move memory to the global pool. This operation
449 // ensures the memory does not become unreachable / leak
450 // when the worker is destroyed.
451 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
452 move_bucket_to_global_free_list(w, i);
457 Local Variables: **
458 c-file-style:"bsd" **
459 c-basic-offset:4 **
460 indent-tabs-mode:nil **
461 End: **