Merge pull request #2582 from ludovic-henry/fix-threadpool-starvation
[mono-project.git] / mono / sgen / sgen-alloc.c
blob349eff5401457757c9757591a398a275d7990aab
1 /*
2 * sgen-alloc.c: Object allocation routines + managed allocators
4 * Author:
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 * ######################################################################
29 * ######## Object allocation
30 * ######################################################################
31 * This section of code deals with allocating memory for objects.
32 * There are several ways:
33 * *) allocate large objects
34 * *) allocate normal objects
35 * *) fast lock-free allocation
36 * *) allocation of pinned objects
39 #include "config.h"
40 #ifdef HAVE_SGEN_GC
42 #include <string.h>
44 #include "mono/sgen/sgen-gc.h"
45 #include "mono/sgen/sgen-protocol.h"
46 #include "mono/sgen/sgen-memory-governor.h"
47 #include "mono/sgen/sgen-client.h"
48 #include "mono/utils/mono-memory-model.h"
50 #define ALIGN_UP SGEN_ALIGN_UP
51 #define ALLOC_ALIGN SGEN_ALLOC_ALIGN
52 #define MAX_SMALL_OBJ_SIZE SGEN_MAX_SMALL_OBJ_SIZE
54 #ifdef HEAVY_STATISTICS
55 static guint64 stat_objects_alloced = 0;
56 static guint64 stat_bytes_alloced = 0;
57 static guint64 stat_bytes_alloced_los = 0;
59 #endif
62 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
63 * from nursery fragments.
64 * tlab_next is the pointer to the space inside the TLAB where the next object will
65 * be allocated.
66 * tlab_temp_end is the pointer to the end of the temporary space reserved for
67 * the allocation: it allows us to set the scan starts at reasonable intervals.
68 * tlab_real_end points to the end of the TLAB.
72 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
73 * variables for next+temp_end ?
75 #ifdef HAVE_KW_THREAD
76 static __thread char *tlab_start;
77 static __thread char *tlab_next;
78 static __thread char *tlab_temp_end;
79 static __thread char *tlab_real_end;
80 /* Used by the managed allocator/wbarrier */
81 static __thread char **tlab_next_addr MONO_ATTR_USED;
82 #endif
84 #ifdef HAVE_KW_THREAD
85 #define TLAB_START tlab_start
86 #define TLAB_NEXT tlab_next
87 #define TLAB_TEMP_END tlab_temp_end
88 #define TLAB_REAL_END tlab_real_end
89 #else
90 #define TLAB_START (__thread_info__->tlab_start)
91 #define TLAB_NEXT (__thread_info__->tlab_next)
92 #define TLAB_TEMP_END (__thread_info__->tlab_temp_end)
93 #define TLAB_REAL_END (__thread_info__->tlab_real_end)
94 #endif
96 static GCObject*
97 alloc_degraded (GCVTable vtable, size_t size, gboolean for_mature)
99 GCObject *p;
101 if (!for_mature) {
102 sgen_client_degraded_allocation (size);
103 SGEN_ATOMIC_ADD_P (degraded_mode, size);
104 sgen_ensure_free_space (size);
105 } else {
106 if (sgen_need_major_collection (size))
107 sgen_perform_collection (size, GENERATION_OLD, "mature allocation failure", !for_mature);
111 p = major_collector.alloc_degraded (vtable, size);
113 if (!for_mature)
114 binary_protocol_alloc_degraded (p, vtable, size, sgen_client_get_provenance ());
116 return p;
119 static void
120 zero_tlab_if_necessary (void *p, size_t size)
122 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION || nursery_clear_policy == CLEAR_AT_TLAB_CREATION_DEBUG) {
123 memset (p, 0, size);
124 } else {
126 * This function is called for all allocations in
127 * TLABs. TLABs originate from fragments, which are
128 * initialized to be faux arrays. The remainder of
129 * the fragments are zeroed out at initialization for
130 * CLEAR_AT_GC, so here we just need to make sure that
131 * the array header is zeroed. Since we don't know
132 * whether we're called for the start of a fragment or
133 * for somewhere in between, we zero in any case, just
134 * to make sure.
136 sgen_client_zero_array_fill_header (p, size);
141 * Provide a variant that takes just the vtable for small fixed-size objects.
142 * The aligned size is already computed and stored in vt->gc_descr.
143 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
144 * processing. We can keep track of where objects start, for example,
145 * so when we scan the thread stacks for pinned objects, we can start
146 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
148 GCObject*
149 sgen_alloc_obj_nolock (GCVTable vtable, size_t size)
151 /* FIXME: handle OOM */
152 void **p;
153 char *new_next;
154 size_t real_size = size;
155 TLAB_ACCESS_INIT;
157 CANARIFY_SIZE(size);
159 HEAVY_STAT (++stat_objects_alloced);
160 if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE)
161 HEAVY_STAT (stat_bytes_alloced += size);
162 else
163 HEAVY_STAT (stat_bytes_alloced_los += size);
165 size = ALIGN_UP (size);
167 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");
169 if (G_UNLIKELY (has_per_allocation_action)) {
170 static int alloc_count;
171 int current_alloc = InterlockedIncrement (&alloc_count);
173 if (collect_before_allocs) {
174 if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
175 sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE);
176 if (!degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) {
177 // FIXME:
178 g_assert_not_reached ();
181 } else if (verify_before_allocs) {
182 if ((current_alloc % verify_before_allocs) == 0)
183 sgen_check_whole_heap_stw ();
188 * We must already have the lock here instead of after the
189 * fast path because we might be interrupted in the fast path
190 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
191 * and we'll end up allocating an object in a fragment which
192 * no longer belongs to us.
194 * The managed allocator does not do this, but it's treated
195 * specially by the world-stopping code.
198 if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) {
199 p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size));
200 } else {
201 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
203 p = (void**)TLAB_NEXT;
204 /* FIXME: handle overflow */
205 new_next = (char*)p + size;
206 TLAB_NEXT = new_next;
208 if (G_LIKELY (new_next < TLAB_TEMP_END)) {
209 /* Fast path */
212 * FIXME: We might need a memory barrier here so the change to tlab_next is
213 * visible before the vtable store.
216 CANARIFY_ALLOC(p,real_size);
217 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
218 binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ());
219 g_assert (*p == NULL);
220 mono_atomic_store_seq (p, vtable);
222 return (GCObject*)p;
225 /* Slow path */
227 /* there are two cases: the object is too big or we run out of space in the TLAB */
228 /* we also reach here when the thread does its first allocation after a minor
229 * collection, since the tlab_ variables are initialized to NULL.
230 * there can be another case (from ORP), if we cooperate with the runtime a bit:
231 * objects that need finalizers can have the high bit set in their size
232 * so the above check fails and we can readily add the object to the queue.
233 * This avoids taking again the GC lock when registering, but this is moot when
234 * doing thread-local allocation, so it may not be a good idea.
236 if (TLAB_NEXT >= TLAB_REAL_END) {
237 int available_in_tlab;
239 * Run out of space in the TLAB. When this happens, some amount of space
240 * remains in the TLAB, but not enough to satisfy the current allocation
241 * request. Currently, we retire the TLAB in all cases, later we could
242 * keep it if the remaining space is above a treshold, and satisfy the
243 * allocation directly from the nursery.
245 TLAB_NEXT -= size;
246 /* when running in degraded mode, we continue allocing that way
247 * for a while, to decrease the number of useless nursery collections.
249 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE)
250 return alloc_degraded (vtable, size, FALSE);
252 available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb
253 if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
254 /* Allocate directly from the nursery */
255 p = (void **)sgen_nursery_alloc (size);
256 if (!p) {
258 * We couldn't allocate from the nursery, so we try
259 * collecting. Even after the collection, we might
260 * still not have enough memory to allocate the
261 * object. The reason will most likely be that we've
262 * run out of memory, but there is the theoretical
263 * possibility that other threads might have consumed
264 * the freed up memory ahead of us.
266 * What we do in this case is allocate degraded, i.e.,
267 * from the major heap.
269 * Ideally we'd like to detect the case of other
270 * threads allocating ahead of us and loop (if we
271 * always loop we will loop endlessly in the case of
272 * OOM).
274 sgen_ensure_free_space (real_size);
275 if (!degraded_mode)
276 p = (void **)sgen_nursery_alloc (size);
278 if (!p)
279 return alloc_degraded (vtable, size, FALSE);
281 zero_tlab_if_necessary (p, size);
282 } else {
283 size_t alloc_size = 0;
284 if (TLAB_START)
285 SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size));
286 sgen_nursery_retire_region (p, available_in_tlab);
288 p = (void **)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
289 if (!p) {
290 /* See comment above in similar case. */
291 sgen_ensure_free_space (tlab_size);
292 if (!degraded_mode)
293 p = (void **)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
295 if (!p)
296 return alloc_degraded (vtable, size, FALSE);
298 /* Allocate a new TLAB from the current nursery fragment */
299 TLAB_START = (char*)p;
300 TLAB_NEXT = TLAB_START;
301 TLAB_REAL_END = TLAB_START + alloc_size;
302 TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size);
304 zero_tlab_if_necessary (TLAB_START, alloc_size);
306 /* Allocate from the TLAB */
307 p = (void **)TLAB_NEXT;
308 TLAB_NEXT += size;
309 sgen_set_nursery_scan_start ((char*)p);
311 } else {
312 /* Reached tlab_temp_end */
314 /* record the scan start so we can find pinned objects more easily */
315 sgen_set_nursery_scan_start ((char*)p);
316 /* we just bump tlab_temp_end as well */
317 TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
318 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
320 CANARIFY_ALLOC(p,real_size);
323 if (G_LIKELY (p)) {
324 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
325 binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
326 mono_atomic_store_seq (p, vtable);
329 return (GCObject*)p;
332 GCObject*
333 sgen_try_alloc_obj_nolock (GCVTable vtable, size_t size)
335 void **p;
336 char *new_next;
337 size_t real_size = size;
338 TLAB_ACCESS_INIT;
340 CANARIFY_SIZE(size);
342 size = ALIGN_UP (size);
343 SGEN_ASSERT (9, real_size >= SGEN_CLIENT_MINIMUM_OBJECT_SIZE, "Object too small");
345 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");
347 if (real_size > SGEN_MAX_SMALL_OBJ_SIZE)
348 return NULL;
350 if (G_UNLIKELY (size > tlab_size)) {
351 /* Allocate directly from the nursery */
352 p = (void **)sgen_nursery_alloc (size);
353 if (!p)
354 return NULL;
355 sgen_set_nursery_scan_start ((char*)p);
357 /*FIXME we should use weak memory ops here. Should help specially on x86. */
358 zero_tlab_if_necessary (p, size);
359 } else {
360 int available_in_tlab;
361 char *real_end;
362 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
364 p = (void**)TLAB_NEXT;
365 /* FIXME: handle overflow */
366 new_next = (char*)p + size;
368 real_end = TLAB_REAL_END;
369 available_in_tlab = (int)(real_end - (char*)p);//We'll never have tlabs > 2Gb
371 if (G_LIKELY (new_next < real_end)) {
372 TLAB_NEXT = new_next;
374 /* Second case, we overflowed temp end */
375 if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) {
376 sgen_set_nursery_scan_start (new_next);
377 /* we just bump tlab_temp_end as well */
378 TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
379 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
381 } else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
382 /* Allocate directly from the nursery */
383 p = (void **)sgen_nursery_alloc (size);
384 if (!p)
385 return NULL;
387 zero_tlab_if_necessary (p, size);
388 } else {
389 size_t alloc_size = 0;
391 sgen_nursery_retire_region (p, available_in_tlab);
392 new_next = (char *)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
393 p = (void**)new_next;
394 if (!p)
395 return NULL;
397 TLAB_START = (char*)new_next;
398 TLAB_NEXT = new_next + size;
399 TLAB_REAL_END = new_next + alloc_size;
400 TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size);
401 sgen_set_nursery_scan_start ((char*)p);
403 zero_tlab_if_necessary (new_next, alloc_size);
407 HEAVY_STAT (++stat_objects_alloced);
408 HEAVY_STAT (stat_bytes_alloced += size);
410 CANARIFY_ALLOC(p,real_size);
411 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
412 binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
413 g_assert (*p == NULL); /* FIXME disable this in non debug builds */
415 mono_atomic_store_seq (p, vtable);
417 return (GCObject*)p;
420 GCObject*
421 sgen_alloc_obj (GCVTable vtable, size_t size)
423 GCObject *res;
424 TLAB_ACCESS_INIT;
426 if (!SGEN_CAN_ALIGN_UP (size))
427 return NULL;
429 if (G_UNLIKELY (has_per_allocation_action)) {
430 static int alloc_count;
431 int current_alloc = InterlockedIncrement (&alloc_count);
433 if (verify_before_allocs) {
434 if ((current_alloc % verify_before_allocs) == 0)
435 sgen_check_whole_heap_stw ();
437 if (collect_before_allocs) {
438 if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
439 LOCK_GC;
440 sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE);
441 UNLOCK_GC;
446 ENTER_CRITICAL_REGION;
447 res = sgen_try_alloc_obj_nolock (vtable, size);
448 if (res) {
449 EXIT_CRITICAL_REGION;
450 return res;
452 EXIT_CRITICAL_REGION;
454 LOCK_GC;
455 res = sgen_alloc_obj_nolock (vtable, size);
456 UNLOCK_GC;
457 return res;
461 * To be used for interned strings and possibly MonoThread, reflection handles.
462 * We may want to explicitly free these objects.
464 GCObject*
465 sgen_alloc_obj_pinned (GCVTable vtable, size_t size)
467 GCObject *p;
469 if (!SGEN_CAN_ALIGN_UP (size))
470 return NULL;
471 size = ALIGN_UP (size);
473 LOCK_GC;
475 if (size > SGEN_MAX_SMALL_OBJ_SIZE) {
476 /* large objects are always pinned anyway */
477 p = (GCObject *)sgen_los_alloc_large_inner (vtable, size);
478 } else {
479 SGEN_ASSERT (9, sgen_client_vtable_is_inited (vtable), "class %s:%s is not initialized", sgen_client_vtable_get_namespace (vtable), sgen_client_vtable_get_name (vtable));
480 p = major_collector.alloc_small_pinned_obj (vtable, size, SGEN_VTABLE_HAS_REFERENCES (vtable));
482 if (G_LIKELY (p)) {
483 SGEN_LOG (6, "Allocated pinned object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
484 binary_protocol_alloc_pinned (p, vtable, size, sgen_client_get_provenance ());
486 UNLOCK_GC;
487 return p;
490 GCObject*
491 sgen_alloc_obj_mature (GCVTable vtable, size_t size)
493 GCObject *res;
495 if (!SGEN_CAN_ALIGN_UP (size))
496 return NULL;
497 size = ALIGN_UP (size);
499 LOCK_GC;
500 res = alloc_degraded (vtable, size, TRUE);
501 UNLOCK_GC;
503 return res;
506 void
507 sgen_init_tlab_info (SgenThreadInfo* info)
509 #ifndef HAVE_KW_THREAD
510 SgenThreadInfo *__thread_info__ = info;
511 #endif
513 info->tlab_start_addr = &TLAB_START;
514 info->tlab_next_addr = &TLAB_NEXT;
515 info->tlab_temp_end_addr = &TLAB_TEMP_END;
516 info->tlab_real_end_addr = &TLAB_REAL_END;
518 #ifdef HAVE_KW_THREAD
519 tlab_next_addr = &tlab_next;
520 #endif
524 * Clear the thread local TLAB variables for all threads.
526 void
527 sgen_clear_tlabs (void)
529 SgenThreadInfo *info;
531 FOREACH_THREAD (info) {
532 /* A new TLAB will be allocated when the thread does its first allocation */
533 *info->tlab_start_addr = NULL;
534 *info->tlab_next_addr = NULL;
535 *info->tlab_temp_end_addr = NULL;
536 *info->tlab_real_end_addr = NULL;
537 } END_FOREACH_THREAD
540 void
541 sgen_init_allocator (void)
543 #if defined(HAVE_KW_THREAD) && !defined(SGEN_WITHOUT_MONO)
544 int tlab_next_addr_offset = -1;
545 int tlab_temp_end_offset = -1;
548 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
549 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
551 mono_tls_key_set_offset (TLS_KEY_SGEN_TLAB_NEXT_ADDR, tlab_next_addr_offset);
552 mono_tls_key_set_offset (TLS_KEY_SGEN_TLAB_TEMP_END, tlab_temp_end_offset);
553 #endif
555 #ifdef HEAVY_STATISTICS
556 mono_counters_register ("# objects allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_objects_alloced);
557 mono_counters_register ("bytes allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced);
558 mono_counters_register ("bytes allocated in LOS", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced_los);
559 #endif
562 #endif /*HAVE_SGEN_GC*/