1 /* frame_malloc.c -*-C-*-
3 *************************************************************************
5 * Copyright (C) 2009-2016, Intel Corporation
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
35 * *********************************************************************
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
38 * a repository at cilkplus.org. Changes made to this file that are not
39 * submitted through the contribution process detailed at
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
41 * time that a new version is released. Changes only submitted to the
42 * GNU compiler collection or posted to the git repository at
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
46 * We welcome your contributions to this open source project. Thank you
47 * for your assistance in helping us improve Cilk Plus.
48 **************************************************************************/
50 #include "frame_malloc.h"
52 #include "local_state.h"
53 #include "cilk_malloc.h"
59 /* #define USE_MMAP 1 */
66 // Define to fill the stack frame header with the fill character when pushing
67 // it on a free list. Note that this should be #ifdef'd out when checked in!
70 #define HEADER_FILL_CHAR 0xbf
73 // HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
74 // message if this is a release build
76 #if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
77 #pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
80 static void allocate_batch(__cilkrts_worker
*w
, int bucket
, size_t size
);
84 const unsigned short __cilkrts_bucket_sizes
[FRAME_MALLOC_NBUCKETS
] =
86 64, 128, 256, 512, 1024, 2048
89 #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
91 /* threshold above which we use slow malloc */
92 #define FRAME_MALLOC_MAX_SIZE 2048
96 /* Note that this must match the implementation of framesz_to_bucket in
97 * asmilator/layout.ml! */
98 #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
100 /* threshold above which we use slow malloc */
101 #define FRAME_MALLOC_MAX_SIZE \
102 FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
106 /* utility procedures */
107 static void push(struct free_list
**b
, struct free_list
*p
)
109 #ifdef HEADER_FILL_CHAR
110 memset (p
, HEADER_FILL_CHAR
, FRAME_MALLOC_BUCKET_TO_SIZE(0));
112 /* cons! onto free list */
117 static struct free_list
*pop(struct free_list
**b
)
119 struct free_list
*p
= *b
;
125 /*************************************************************
127 *************************************************************/
128 /* request slightly less than 2^K from the OS, which after malloc
129 overhead and alignment should end up filling each VM page almost
130 completely. 128 is a guess of the total malloc overhead and cache
132 #define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
134 /** Implements linked list of frames */
136 char *p
; /**< This element of the list */
137 struct pool_cons
*cdr
; /**< Remainder of the list */
140 static void extend_global_pool(global_state_t
*g
)
142 /* FIXME: memalign to a cache line? */
143 struct pool_cons
*c
= (struct pool_cons
*)__cilkrts_malloc(sizeof(*c
));
144 g
->frame_malloc
.pool_begin
=
145 (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK
);
146 g
->frame_malloc
.pool_end
=
147 g
->frame_malloc
.pool_begin
+ FRAME_MALLOC_CHUNK
;
148 g
->frame_malloc
.allocated_from_os
+= FRAME_MALLOC_CHUNK
;
149 c
->p
= g
->frame_malloc
.pool_begin
;
150 c
->cdr
= g
->frame_malloc
.pool_list
;
151 g
->frame_malloc
.pool_list
= c
;
154 /* the size is already canonicalized at this point */
155 static struct free_list
*global_alloc(global_state_t
*g
, int bucket
)
157 struct free_list
*mem
;
160 CILK_ASSERT(bucket
< FRAME_MALLOC_NBUCKETS
);
161 size
= FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
162 g
->frame_malloc
.allocated_from_global_pool
+= size
;
164 if (!(mem
= pop(&g
->frame_malloc
.global_free_list
[bucket
]))) {
166 CILK_ASSERT(g
->frame_malloc
.pool_begin
<= g
->frame_malloc
.pool_end
);
167 if (g
->frame_malloc
.pool_begin
+ size
> g
->frame_malloc
.pool_end
) {
168 /* We waste the fragment of pool. */
169 g
->frame_malloc
.wasted
+=
170 g
->frame_malloc
.pool_end
- g
->frame_malloc
.pool_begin
;
171 extend_global_pool(g
);
173 mem
= (struct free_list
*)g
->frame_malloc
.pool_begin
;
174 g
->frame_malloc
.pool_begin
+= size
;
180 static void global_free(global_state_t
*g
, void *mem
, int bucket
)
184 CILK_ASSERT(bucket
< FRAME_MALLOC_NBUCKETS
);
185 size
= FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
186 g
->frame_malloc
.allocated_from_global_pool
-= size
;
188 push(&g
->frame_malloc
.global_free_list
[bucket
], mem
);
191 void __cilkrts_frame_malloc_global_init(global_state_t
*g
)
195 __cilkrts_mutex_init(&g
->frame_malloc
.lock
);
196 g
->frame_malloc
.check_for_leaks
= 1;
197 g
->frame_malloc
.pool_list
= 0;
198 g
->frame_malloc
.pool_begin
= 0;
199 g
->frame_malloc
.pool_end
= 0;
200 g
->frame_malloc
.batch_size
= 8000;
201 g
->frame_malloc
.potential_limit
= 4 * g
->frame_malloc
.batch_size
;
202 g
->frame_malloc
.allocated_from_os
= 0;
203 g
->frame_malloc
.allocated_from_global_pool
= 0;
204 g
->frame_malloc
.wasted
= 0;
205 for (i
= 0; i
< FRAME_MALLOC_NBUCKETS
; ++i
)
206 g
->frame_malloc
.global_free_list
[i
] = 0;
209 // Counts how many bytes are in the global free list.
210 static size_t count_memory_in_global_list(global_state_t
*g
)
213 // Count the memory remaining in the global free list.
214 size_t size_remaining_in_global_list
= 0;
216 for (i
= 0; i
< FRAME_MALLOC_NBUCKETS
; ++i
) {
218 size_t size_in_bucket
= 0;
219 p
= g
->frame_malloc
.global_free_list
[i
];
222 size_in_bucket
+= FRAME_MALLOC_BUCKET_TO_SIZE(i
);
225 size_remaining_in_global_list
+= size_in_bucket
;
227 return size_remaining_in_global_list
;
231 void __cilkrts_frame_malloc_global_cleanup(global_state_t
*g
)
235 if (g
->frame_malloc
.check_for_leaks
) {
236 size_t memory_in_global_list
= count_memory_in_global_list(g
);
237 // TBD: This check is weak. Short of memory corruption,
238 // I don't see how we have more memory in the free list
239 // than allocated from the os.
240 // Ideally, we should count the memory in the global free list
241 // and check that we have it all. But I believe the runtime
242 // itself also uses some memory, which is not being tracked.
243 if (memory_in_global_list
> g
->frame_malloc
.allocated_from_os
) {
244 __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
248 while ((c
= g
->frame_malloc
.pool_list
)) {
249 g
->frame_malloc
.pool_list
= c
->cdr
;
250 __cilkrts_free(c
->p
);
254 __cilkrts_mutex_destroy(0, &g
->frame_malloc
.lock
);
256 // Check that all the memory moved from the global pool into
257 // workers has been returned to the global pool.
258 if (g
->frame_malloc
.check_for_leaks
259 && (g
->frame_malloc
.allocated_from_global_pool
!= 0))
262 "---------------------------" "\n"
263 " MEMORY LEAK DETECTED!!! " "\n"
264 "---------------------------" "\n"
270 /*************************************************************
272 *************************************************************/
273 /* allocate a batch of frames of size SIZE from the global pool and
274 store them in the worker's free list */
275 static void allocate_batch(__cilkrts_worker
*w
, int bucket
, size_t size
)
277 global_state_t
*g
= w
->g
;
279 __cilkrts_mutex_lock(w
, &g
->frame_malloc
.lock
); {
281 char *p
= mmap(0, 12288, PROT_READ
|PROT_WRITE
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1, 0);
283 __cilkrts_bug("mmap failed %d", errno
);
285 assert(p
!= MAP_FAILED
);
286 mprotect(p
, 4096, PROT_NONE
);
287 mprotect(p
+ 8192, 4096, PROT_NONE
);
288 w
->l
->bucket_potential
[bucket
] += size
;
289 push(&w
->l
->free_list
[bucket
], (struct free_list
*)(p
+ 8192 - size
));
291 size_t bytes_allocated
= 0;
293 w
->l
->bucket_potential
[bucket
] += size
;
294 bytes_allocated
+= size
;
295 push(&w
->l
->free_list
[bucket
], global_alloc(g
, bucket
));
296 } while (bytes_allocated
< g
->frame_malloc
.batch_size
);
298 } __cilkrts_mutex_unlock(w
, &g
->frame_malloc
.lock
);
302 static void gc_bucket(__cilkrts_worker
*w
, int bucket
, size_t size
)
304 struct free_list
*p
, *q
;
305 global_state_t
*g
= w
->g
;
306 size_t pot
= w
->l
->bucket_potential
[bucket
];
309 /* Keep up to POT/2 elements in the free list. The cost of
310 counting up to POT/2 is amortized against POT. */
312 for (newpot
= 0, p
= w
->l
->free_list
[bucket
]; p
&& 2 * newpot
< pot
;
313 p
= p
->cdr
, newpot
+= size
)
315 w
->l
->bucket_potential
[bucket
] = newpot
;
318 /* free the rest of the list. The cost of grabbing the lock
319 is amortized against POT/2; the cost of traversing the rest
320 of the list is amortized against the free operation that
321 puts the element on the list. */
322 __cilkrts_mutex_lock(w
, &g
->frame_malloc
.lock
); {
323 while ((q
= pop(&p
->cdr
)))
325 munmap((char *)q
+ size
- 8192, 12288);
327 global_free(g
, q
, bucket
);
329 } __cilkrts_mutex_unlock(w
, &g
->frame_malloc
.lock
);
333 // Free all the memory in this bucket for the specified worker,
334 // returning it to the global pool's free list.
335 static void move_bucket_to_global_free_list(__cilkrts_worker
*w
,
338 struct free_list
*p
, *q
;
339 global_state_t
*g
= w
->g
;
340 p
= w
->l
->free_list
[bucket
];
343 __cilkrts_mutex_lock(w
, &g
->frame_malloc
.lock
); {
344 while ((q
= pop(&p
))) {
346 size_t size
= FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
347 munmap((char *)q
+ size
- 8192, 12288);
349 global_free(g
, q
, bucket
);
352 } __cilkrts_mutex_unlock(w
, &g
->frame_malloc
.lock
);
355 // I'm not sure this does anything useful now, since
356 // the worker is about to be destroyed. But why not?
357 w
->l
->bucket_potential
[bucket
] = 0;
360 static int bucket_of_size(size_t size
)
364 for (i
= 0; i
< FRAME_MALLOC_NBUCKETS
; ++i
)
365 if (size
<= FRAME_MALLOC_BUCKET_TO_SIZE(i
))
368 CILK_ASSERT(0 /* can't happen */);
372 size_t __cilkrts_frame_malloc_roundup(size_t size
)
374 if (size
> FRAME_MALLOC_MAX_SIZE
) {
375 /* nothing, leave it alone */
377 int bucket
= bucket_of_size(size
);
378 size
= FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
383 size_t __cilkrts_size_of_bucket(int bucket
)
385 CILK_ASSERT(bucket
>= 0 && bucket
< FRAME_MALLOC_NBUCKETS
);
386 return FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
389 void *__cilkrts_frame_malloc(__cilkrts_worker
*w
, size_t size
)
394 /* if too large, or if no worker, fall back to __cilkrts_malloc() */
395 if (!w
|| size
> FRAME_MALLOC_MAX_SIZE
) {
396 NOTE_INTERVAL(w
, INTERVAL_FRAME_ALLOC_LARGE
);
397 return __cilkrts_malloc(size
);
400 START_INTERVAL(w
, INTERVAL_FRAME_ALLOC
); {
401 bucket
= bucket_of_size(size
);
402 size
= FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
404 while (!(mem
= pop(&w
->l
->free_list
[bucket
]))) {
405 /* get a batch of frames from the global pool */
406 START_INTERVAL(w
, INTERVAL_FRAME_ALLOC_GLOBAL
) {
407 allocate_batch(w
, bucket
, size
);
408 } STOP_INTERVAL(w
, INTERVAL_FRAME_ALLOC_GLOBAL
);
410 } STOP_INTERVAL(w
, INTERVAL_FRAME_ALLOC
);
415 void __cilkrts_frame_free(__cilkrts_worker
*w
, void *p0
, size_t size
)
418 struct free_list
*p
= (struct free_list
*)p0
;
420 /* if too large, or if no worker, fall back to __cilkrts_free() */
421 if (!w
|| size
> FRAME_MALLOC_MAX_SIZE
) {
422 NOTE_INTERVAL(w
, INTERVAL_FRAME_FREE_LARGE
);
431 START_INTERVAL(w
, INTERVAL_FRAME_FREE
); {
432 bucket
= bucket_of_size(size
);
433 size
= FRAME_MALLOC_BUCKET_TO_SIZE(bucket
);
434 w
->l
->bucket_potential
[bucket
] += size
;
435 push(&w
->l
->free_list
[bucket
], p
);
436 if (w
->l
->bucket_potential
[bucket
] >
437 w
->g
->frame_malloc
.potential_limit
) {
438 START_INTERVAL(w
, INTERVAL_FRAME_FREE_GLOBAL
) {
439 gc_bucket(w
, bucket
, size
);
440 } STOP_INTERVAL(w
, INTERVAL_FRAME_FREE_GLOBAL
);
442 } STOP_INTERVAL(w
, INTERVAL_FRAME_FREE
);
445 void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker
*w
)
448 local_state
*l
= w
->l
;
450 for (i
= 0; i
< FRAME_MALLOC_NBUCKETS
; ++i
) {
452 l
->bucket_potential
[i
] = 0;
456 void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker
*w
)
459 // Move memory to the global pool. This operation
460 // ensures the memory does not become unreachable / leak
461 // when the worker is destroyed.
462 for (i
= 0; i
< FRAME_MALLOC_NBUCKETS
; ++i
) {
463 move_bucket_to_global_free_list(w
, i
);
469 c-file-style:"bsd" **
471 indent-tabs-mode:nil **