Base subreg rules on REGMODE_NATURAL_SIZE rather than UNITS_PER_WORD
[official-gcc.git] / libcilkrts / runtime / frame_malloc.c
blob9cbea45d22cd82bae1e405bd2b3c3a3b204c04c3
1 /* frame_malloc.c -*-C-*-
3 *************************************************************************
5 * Copyright (C) 2009-2016, Intel Corporation
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
35 * *********************************************************************
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
38 * a repository at cilkplus.org. Changes made to this file that are not
39 * submitted through the contribution process detailed at
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
41 * time that a new version is released. Changes only submitted to the
42 * GNU compiler collection or posted to the git repository at
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
44 * not tracked.
46 * We welcome your contributions to this open source project. Thank you
47 * for your assistance in helping us improve Cilk Plus.
48 **************************************************************************/
50 #include "frame_malloc.h"
51 #include "bug.h"
52 #include "local_state.h"
53 #include "cilk_malloc.h"
55 #ifndef __VXWORKS__
56 #include <memory.h>
57 #endif
59 /* #define USE_MMAP 1 */
60 #if USE_MMAP
61 #define __USE_MISC 1
62 #include <sys/mman.h>
63 #include <errno.h>
64 #endif
66 // Define to fill the stack frame header with the fill character when pushing
67 // it on a free list. Note that this should be #ifdef'd out when checked in!
69 #ifdef _DEBUG
70 #define HEADER_FILL_CHAR 0xbf
71 #endif
73 // HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
74 // message if this is a release build
76 #if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
77 #pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
78 #endif
80 static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size);
82 #ifndef _WIN32
84 const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] =
86 64, 128, 256, 512, 1024, 2048
89 #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
91 /* threshold above which we use slow malloc */
92 #define FRAME_MALLOC_MAX_SIZE 2048
94 #else // _WIN32
96 /* Note that this must match the implementation of framesz_to_bucket in
97 * asmilator/layout.ml! */
98 #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
100 /* threshold above which we use slow malloc */
101 #define FRAME_MALLOC_MAX_SIZE \
102 FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
104 #endif // _WIN32
106 /* utility procedures */
107 static void push(struct free_list **b, struct free_list *p)
109 #ifdef HEADER_FILL_CHAR
110 memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0));
111 #endif
112 /* cons! onto free list */
113 p->cdr = *b;
114 *b = p;
117 static struct free_list *pop(struct free_list **b)
119 struct free_list *p = *b;
120 if (p)
121 *b = p->cdr;
122 return p;
125 /*************************************************************
126 global allocator:
127 *************************************************************/
128 /* request slightly less than 2^K from the OS, which after malloc
129 overhead and alignment should end up filling each VM page almost
130 completely. 128 is a guess of the total malloc overhead and cache
131 line alignment */
132 #define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
134 /** Implements linked list of frames */
135 struct pool_cons {
136 char *p; /**< This element of the list */
137 struct pool_cons *cdr; /**< Remainder of the list */
140 static void extend_global_pool(global_state_t *g)
142 /* FIXME: memalign to a cache line? */
143 struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c));
144 g->frame_malloc.pool_begin =
145 (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK);
146 g->frame_malloc.pool_end =
147 g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK;
148 g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK;
149 c->p = g->frame_malloc.pool_begin;
150 c->cdr = g->frame_malloc.pool_list;
151 g->frame_malloc.pool_list = c;
154 /* the size is already canonicalized at this point */
155 static struct free_list *global_alloc(global_state_t *g, int bucket)
157 struct free_list *mem;
158 size_t size;
160 CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
161 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
162 g->frame_malloc.allocated_from_global_pool += size;
164 if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) {
166 CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end);
167 if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) {
168 /* We waste the fragment of pool. */
169 g->frame_malloc.wasted +=
170 g->frame_malloc.pool_end - g->frame_malloc.pool_begin;
171 extend_global_pool(g);
173 mem = (struct free_list *)g->frame_malloc.pool_begin;
174 g->frame_malloc.pool_begin += size;
177 return mem;
180 static void global_free(global_state_t *g, void *mem, int bucket)
182 size_t size;
184 CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
185 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
186 g->frame_malloc.allocated_from_global_pool -= size;
188 push(&g->frame_malloc.global_free_list[bucket], mem);
191 void __cilkrts_frame_malloc_global_init(global_state_t *g)
193 int i;
195 __cilkrts_mutex_init(&g->frame_malloc.lock);
196 g->frame_malloc.check_for_leaks = 1;
197 g->frame_malloc.pool_list = 0;
198 g->frame_malloc.pool_begin = 0;
199 g->frame_malloc.pool_end = 0;
200 g->frame_malloc.batch_size = 8000;
201 g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size;
202 g->frame_malloc.allocated_from_os = 0;
203 g->frame_malloc.allocated_from_global_pool = 0;
204 g->frame_malloc.wasted = 0;
205 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
206 g->frame_malloc.global_free_list[i] = 0;
209 // Counts how many bytes are in the global free list.
210 static size_t count_memory_in_global_list(global_state_t *g)
213 // Count the memory remaining in the global free list.
214 size_t size_remaining_in_global_list = 0;
215 int i;
216 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
217 struct free_list *p;
218 size_t size_in_bucket = 0;
219 p = g->frame_malloc.global_free_list[i];
221 while (p) {
222 size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i);
223 p = p->cdr;
225 size_remaining_in_global_list += size_in_bucket;
227 return size_remaining_in_global_list;
231 void __cilkrts_frame_malloc_global_cleanup(global_state_t *g)
233 struct pool_cons *c;
235 if (g->frame_malloc.check_for_leaks) {
236 size_t memory_in_global_list = count_memory_in_global_list(g);
237 // TBD: This check is weak. Short of memory corruption,
238 // I don't see how we have more memory in the free list
239 // than allocated from the os.
240 // Ideally, we should count the memory in the global free list
241 // and check that we have it all. But I believe the runtime
242 // itself also uses some memory, which is not being tracked.
243 if (memory_in_global_list > g->frame_malloc.allocated_from_os) {
244 __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
248 while ((c = g->frame_malloc.pool_list)) {
249 g->frame_malloc.pool_list = c->cdr;
250 __cilkrts_free(c->p);
251 __cilkrts_free(c);
254 __cilkrts_mutex_destroy(0, &g->frame_malloc.lock);
256 // Check that all the memory moved from the global pool into
257 // workers has been returned to the global pool.
258 if (g->frame_malloc.check_for_leaks
259 && (g->frame_malloc.allocated_from_global_pool != 0))
261 __cilkrts_bug("\n"
262 "---------------------------" "\n"
263 " MEMORY LEAK DETECTED!!! " "\n"
264 "---------------------------" "\n"
265 "\n"
270 /*************************************************************
271 per-worker allocator
272 *************************************************************/
273 /* allocate a batch of frames of size SIZE from the global pool and
274 store them in the worker's free list */
275 static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size)
277 global_state_t *g = w->g;
279 __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
280 #if USE_MMAP
281 char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
282 if (p == MAP_FAILED)
283 __cilkrts_bug("mmap failed %d", errno);
284 assert(size < 4096);
285 assert(p != MAP_FAILED);
286 mprotect(p, 4096, PROT_NONE);
287 mprotect(p + 8192, 4096, PROT_NONE);
288 w->l->bucket_potential[bucket] += size;
289 push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size));
290 #else
291 size_t bytes_allocated = 0;
292 do {
293 w->l->bucket_potential[bucket] += size;
294 bytes_allocated += size;
295 push(&w->l->free_list[bucket], global_alloc(g, bucket));
296 } while (bytes_allocated < g->frame_malloc.batch_size);
297 #endif
298 } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
302 static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size)
304 struct free_list *p, *q;
305 global_state_t *g = w->g;
306 size_t pot = w->l->bucket_potential[bucket];
307 size_t newpot;
309 /* Keep up to POT/2 elements in the free list. The cost of
310 counting up to POT/2 is amortized against POT. */
311 newpot = 0;
312 for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot;
313 p = p->cdr, newpot += size)
315 w->l->bucket_potential[bucket] = newpot;
317 if (p) {
318 /* free the rest of the list. The cost of grabbing the lock
319 is amortized against POT/2; the cost of traversing the rest
320 of the list is amortized against the free operation that
321 puts the element on the list. */
322 __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
323 while ((q = pop(&p->cdr)))
324 #if USE_MMAP
325 munmap((char *)q + size - 8192, 12288);
326 #else
327 global_free(g, q, bucket);
328 #endif
329 } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
333 // Free all the memory in this bucket for the specified worker,
334 // returning it to the global pool's free list.
335 static void move_bucket_to_global_free_list(__cilkrts_worker *w,
336 int bucket)
338 struct free_list *p, *q;
339 global_state_t *g = w->g;
340 p = w->l->free_list[bucket];
342 if (p) {
343 __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
344 while ((q = pop(&p))) {
345 #if USE_MMAP
346 size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
347 munmap((char *)q + size - 8192, 12288);
348 #else
349 global_free(g, q, bucket);
350 #endif
352 } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
355 // I'm not sure this does anything useful now, since
356 // the worker is about to be destroyed. But why not?
357 w->l->bucket_potential[bucket] = 0;
360 static int bucket_of_size(size_t size)
362 int i;
364 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
365 if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i))
366 return i;
368 CILK_ASSERT(0 /* can't happen */);
369 return -1;
372 size_t __cilkrts_frame_malloc_roundup(size_t size)
374 if (size > FRAME_MALLOC_MAX_SIZE) {
375 /* nothing, leave it alone */
376 } else {
377 int bucket = bucket_of_size(size);
378 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
380 return size;
383 size_t __cilkrts_size_of_bucket(int bucket)
385 CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS);
386 return FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
389 void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
391 int bucket;
392 void *mem;
394 /* if too large, or if no worker, fall back to __cilkrts_malloc() */
395 if (!w || size > FRAME_MALLOC_MAX_SIZE) {
396 NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
397 return __cilkrts_malloc(size);
400 START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
401 bucket = bucket_of_size(size);
402 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
404 while (!(mem = pop(&w->l->free_list[bucket]))) {
405 /* get a batch of frames from the global pool */
406 START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
407 allocate_batch(w, bucket, size);
408 } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
410 } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
412 return mem;
415 void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size)
417 int bucket;
418 struct free_list *p = (struct free_list *)p0;
420 /* if too large, or if no worker, fall back to __cilkrts_free() */
421 if (!w || size > FRAME_MALLOC_MAX_SIZE) {
422 NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE);
423 __cilkrts_free(p);
424 return;
427 #if CILK_LIB_DEBUG
428 *(volatile long *)w;
429 #endif
431 START_INTERVAL(w, INTERVAL_FRAME_FREE); {
432 bucket = bucket_of_size(size);
433 size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
434 w->l->bucket_potential[bucket] += size;
435 push(&w->l->free_list[bucket], p);
436 if (w->l->bucket_potential[bucket] >
437 w->g->frame_malloc.potential_limit) {
438 START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) {
439 gc_bucket(w, bucket, size);
440 } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL);
442 } STOP_INTERVAL(w, INTERVAL_FRAME_FREE);
445 void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w)
447 int i;
448 local_state *l = w->l;
450 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
451 l->free_list[i] = 0;
452 l->bucket_potential[i] = 0;
456 void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w)
458 int i;
459 // Move memory to the global pool. This operation
460 // ensures the memory does not become unreachable / leak
461 // when the worker is destroyed.
462 for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
463 move_bucket_to_global_free_list(w, i);
468 Local Variables: **
469 c-file-style:"bsd" **
470 c-basic-offset:4 **
471 indent-tabs-mode:nil **
472 End: **