3 * Object allocation routines + managed allocators
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
18 * ######################################################################
19 * ######## Object allocation
20 * ######################################################################
21 * This section of code deals with allocating memory for objects.
22 * There are several ways:
23 * *) allocate large objects
24 * *) allocate normal objects
25 * *) fast lock-free allocation
26 * *) allocation of pinned objects
34 #include "mono/sgen/sgen-gc.h"
35 #include "mono/sgen/sgen-protocol.h"
36 #include "mono/sgen/sgen-memory-governor.h"
37 #include "mono/sgen/sgen-client.h"
38 #include "mono/utils/mono-memory-model.h"
40 #define ALIGN_UP SGEN_ALIGN_UP
41 #define ALLOC_ALIGN SGEN_ALLOC_ALIGN
42 #define MAX_SMALL_OBJ_SIZE SGEN_MAX_SMALL_OBJ_SIZE
44 #ifdef HEAVY_STATISTICS
45 static guint64 stat_objects_alloced
= 0;
46 static guint64 stat_bytes_alloced
= 0;
47 static guint64 stat_bytes_alloced_los
= 0;
52 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
53 * from nursery fragments.
54 * tlab_next is the pointer to the space inside the TLAB where the next object will
56 * tlab_temp_end is the pointer to the end of the temporary space reserved for
57 * the allocation: it allows us to set the scan starts at reasonable intervals.
58 * tlab_real_end points to the end of the TLAB.
61 #define TLAB_START (__thread_info__->tlab_start)
62 #define TLAB_NEXT (__thread_info__->tlab_next)
63 #define TLAB_TEMP_END (__thread_info__->tlab_temp_end)
64 #define TLAB_REAL_END (__thread_info__->tlab_real_end)
67 increment_thread_allocation_counter (size_t byte_size
)
69 mono_thread_info_current ()->total_bytes_allocated
+= byte_size
;
73 alloc_degraded (GCVTable vtable
, size_t size
, gboolean for_mature
)
76 increment_thread_allocation_counter (size
);
79 sgen_client_degraded_allocation ();
80 SGEN_ATOMIC_ADD_P (sgen_degraded_mode
, size
);
81 sgen_ensure_free_space (size
, GENERATION_OLD
);
84 if (sgen_need_major_collection (size
, &forced
))
85 sgen_perform_collection (size
, GENERATION_OLD
, "mature allocation failure", !for_mature
|| forced
, TRUE
);
89 p
= sgen_major_collector
.alloc_degraded (vtable
, size
);
92 sgen_binary_protocol_alloc_degraded (p
, vtable
, size
, sgen_client_get_provenance ());
98 zero_tlab_if_necessary (void *p
, size_t size
)
100 if (sgen_nursery_clear_policy
== CLEAR_AT_TLAB_CREATION
|| sgen_nursery_clear_policy
== CLEAR_AT_TLAB_CREATION_DEBUG
) {
104 * This function is called for all allocations in
105 * TLABs. TLABs originate from fragments, which are
106 * initialized to be faux arrays. The remainder of
107 * the fragments are zeroed out at initialization for
108 * CLEAR_AT_GC, so here we just need to make sure that
109 * the array header is zeroed. Since we don't know
110 * whether we're called for the start of a fragment or
111 * for somewhere in between, we zero in any case, just
114 sgen_client_zero_array_fill_header (p
, size
);
119 * Provide a variant that takes just the vtable for small fixed-size objects.
120 * The aligned size is already computed and stored in vt->gc_descr.
121 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
122 * processing. We can keep track of where objects start, for example,
123 * so when we scan the thread stacks for pinned objects, we can start
124 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
127 sgen_alloc_obj_nolock (GCVTable vtable
, size_t size
)
129 /* FIXME: handle OOM */
132 size_t real_size
= size
;
137 HEAVY_STAT (++stat_objects_alloced
);
138 if (real_size
<= SGEN_MAX_SMALL_OBJ_SIZE
)
139 HEAVY_STAT (stat_bytes_alloced
+= size
);
141 HEAVY_STAT (stat_bytes_alloced_los
+= size
);
143 size
= ALIGN_UP (size
);
145 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable
), "VTable without descriptor");
147 if (G_UNLIKELY (sgen_has_per_allocation_action
)) {
148 static int alloc_count
;
149 int current_alloc
= mono_atomic_inc_i32 (&alloc_count
);
151 if (sgen_collect_before_allocs
) {
152 if (((current_alloc
% sgen_collect_before_allocs
) == 0) && sgen_nursery_section
) {
153 sgen_perform_collection (0, GENERATION_NURSERY
, "collect-before-alloc-triggered", TRUE
, TRUE
);
154 if (!sgen_degraded_mode
&& sgen_can_alloc_size (size
) && real_size
<= SGEN_MAX_SMALL_OBJ_SIZE
) {
156 g_assert_not_reached ();
159 } else if (sgen_verify_before_allocs
) {
160 if ((current_alloc
% sgen_verify_before_allocs
) == 0)
161 sgen_check_whole_heap_stw ();
166 * We must already have the lock here instead of after the
167 * fast path because we might be interrupted in the fast path
168 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
169 * and we'll end up allocating an object in a fragment which
170 * no longer belongs to us.
172 * The managed allocator does not do this, but it's treated
173 * specially by the world-stopping code.
176 if (real_size
> SGEN_MAX_SMALL_OBJ_SIZE
) {
177 p
= (void **)sgen_los_alloc_large_inner (vtable
, ALIGN_UP (real_size
));
179 increment_thread_allocation_counter (size
);
182 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
184 p
= (void**)TLAB_NEXT
;
185 /* FIXME: handle overflow */
186 new_next
= (char*)p
+ size
;
187 TLAB_NEXT
= new_next
;
189 if (G_LIKELY (new_next
< TLAB_TEMP_END
)) {
192 CANARIFY_ALLOC(p
,real_size
);
193 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
194 sgen_binary_protocol_alloc (p
, vtable
, size
, sgen_client_get_provenance ());
195 g_assert (*p
== NULL
);
196 mono_atomic_store_seq (p
, vtable
);
203 /* there are two cases: the object is too big or we run out of space in the TLAB */
204 /* we also reach here when the thread does its first allocation after a minor
205 * collection, since the tlab_ variables are initialized to NULL.
206 * there can be another case (from ORP), if we cooperate with the runtime a bit:
207 * objects that need finalizers can have the high bit set in their size
208 * so the above check fails and we can readily add the object to the queue.
209 * This avoids taking again the GC lock when registering, but this is moot when
210 * doing thread-local allocation, so it may not be a good idea.
212 if (TLAB_NEXT
>= TLAB_REAL_END
) {
213 int available_in_tlab
;
215 * Run out of space in the TLAB. When this happens, some amount of space
216 * remains in the TLAB, but not enough to satisfy the current allocation
217 * request. We keep the TLAB for future allocations if the remaining
218 * space is above a treshold, and satisfy the allocation directly
219 * from the nursery. Otherwise, we attempt to get a new TLAB from the
220 * nursery and allocate into it.
223 /* when running in degraded mode, we continue allocing that way
224 * for a while, to decrease the number of useless nursery collections.
226 if (sgen_degraded_mode
&& sgen_degraded_mode
< sgen_nursery_size
)
227 return alloc_degraded (vtable
, size
, FALSE
);
229 available_in_tlab
= (int)(TLAB_REAL_END
- TLAB_NEXT
);//We'll never have tlabs > 2Gb
230 if (size
> sgen_tlab_size
|| available_in_tlab
> SGEN_MAX_NURSERY_WASTE
) {
231 /* Allocate directly from the nursery */
232 p
= (void **)sgen_nursery_alloc (size
);
235 * We couldn't allocate from the nursery, so we try
236 * collecting. Even after the collection, we might
237 * still not have enough memory to allocate the
238 * object. The reason will most likely be that we've
239 * run out of memory, but there is the theoretical
240 * possibility that other threads might have consumed
241 * the freed up memory ahead of us.
243 * What we do in this case is allocate degraded, i.e.,
244 * from the major heap.
246 * Ideally we'd like to detect the case of other
247 * threads allocating ahead of us and loop (if we
248 * always loop we will loop endlessly in the case of
251 sgen_ensure_free_space (real_size
, GENERATION_NURSERY
);
252 if (!sgen_degraded_mode
)
253 p
= (void **)sgen_nursery_alloc (size
);
256 increment_thread_allocation_counter (size
);
260 return alloc_degraded (vtable
, size
, TRUE
);
262 zero_tlab_if_necessary (p
, size
);
264 size_t alloc_size
= 0;
266 SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START
, TLAB_REAL_END
, (long)(TLAB_REAL_END
- TLAB_NEXT
- size
));
267 sgen_nursery_retire_region (p
, available_in_tlab
);
269 p
= (void **)sgen_nursery_alloc_range (sgen_tlab_size
, size
, &alloc_size
);
271 /* See comment above in similar case. */
272 sgen_ensure_free_space (sgen_tlab_size
, GENERATION_NURSERY
);
273 if (!sgen_degraded_mode
)
274 p
= (void **)sgen_nursery_alloc_range (sgen_tlab_size
, size
, &alloc_size
);
277 return alloc_degraded (vtable
, size
, TRUE
);
279 increment_thread_allocation_counter (TLAB_NEXT
- TLAB_START
);
281 /* Allocate a new TLAB from the current nursery fragment */
282 TLAB_START
= (char*)p
;
283 TLAB_NEXT
= TLAB_START
;
284 TLAB_REAL_END
= TLAB_START
+ alloc_size
;
285 TLAB_TEMP_END
= TLAB_START
+ MIN (SGEN_SCAN_START_SIZE
, alloc_size
);
287 zero_tlab_if_necessary (TLAB_START
, alloc_size
);
289 /* Allocate from the TLAB */
290 p
= (void **)TLAB_NEXT
;
292 sgen_set_nursery_scan_start ((char*)p
);
295 /* Reached tlab_temp_end */
297 /* record the scan start so we can find pinned objects more easily */
298 sgen_set_nursery_scan_start ((char*)p
);
299 /* we just bump tlab_temp_end as well */
300 TLAB_TEMP_END
= MIN (TLAB_REAL_END
, TLAB_NEXT
+ SGEN_SCAN_START_SIZE
);
301 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT
, TLAB_TEMP_END
);
303 CANARIFY_ALLOC(p
,real_size
);
307 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
308 sgen_binary_protocol_alloc (p
, vtable
, size
, sgen_client_get_provenance ());
309 mono_atomic_store_seq (p
, vtable
);
316 sgen_try_alloc_obj_nolock (GCVTable vtable
, size_t size
)
320 size_t real_size
= size
;
325 size
= ALIGN_UP (size
);
326 SGEN_ASSERT (9, real_size
>= SGEN_CLIENT_MINIMUM_OBJECT_SIZE
, "Object too small");
328 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable
), "VTable without descriptor");
330 if (real_size
> SGEN_MAX_SMALL_OBJ_SIZE
)
333 if (G_UNLIKELY (size
> sgen_tlab_size
)) {
334 /* Allocate directly from the nursery */
336 p
= (void **)sgen_nursery_alloc (size
);
340 increment_thread_allocation_counter (size
);
341 sgen_set_nursery_scan_start ((char*)p
);
343 /*FIXME we should use weak memory ops here. Should help specially on x86. */
344 zero_tlab_if_necessary (p
, size
);
346 int available_in_tlab
;
348 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
350 p
= (void**)TLAB_NEXT
;
351 /* FIXME: handle overflow */
352 new_next
= (char*)p
+ size
;
354 real_end
= TLAB_REAL_END
;
355 available_in_tlab
= (int)(real_end
- (char*)p
);//We'll never have tlabs > 2Gb
357 if (G_LIKELY (new_next
< real_end
)) {
358 TLAB_NEXT
= new_next
;
360 /* Second case, we overflowed temp end */
361 if (G_UNLIKELY (new_next
>= TLAB_TEMP_END
)) {
362 sgen_set_nursery_scan_start (new_next
);
363 /* we just bump tlab_temp_end as well */
364 TLAB_TEMP_END
= MIN (TLAB_REAL_END
, TLAB_NEXT
+ SGEN_SCAN_START_SIZE
);
365 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT
, TLAB_TEMP_END
);
367 } else if (available_in_tlab
> SGEN_MAX_NURSERY_WASTE
) {
368 /* Allocate directly from the nursery */
369 p
= (void **)sgen_nursery_alloc (size
);
373 increment_thread_allocation_counter (size
);
374 zero_tlab_if_necessary (p
, size
);
376 size_t alloc_size
= 0;
378 sgen_nursery_retire_region (p
, available_in_tlab
);
379 new_next
= (char *)sgen_nursery_alloc_range (sgen_tlab_size
, size
, &alloc_size
);
380 p
= (void**)new_next
;
384 increment_thread_allocation_counter (TLAB_NEXT
- TLAB_START
);
386 TLAB_START
= (char*)new_next
;
387 TLAB_NEXT
= new_next
+ size
;
388 TLAB_REAL_END
= new_next
+ alloc_size
;
389 TLAB_TEMP_END
= new_next
+ MIN (SGEN_SCAN_START_SIZE
, alloc_size
);
390 sgen_set_nursery_scan_start ((char*)p
);
392 zero_tlab_if_necessary (new_next
, alloc_size
);
396 HEAVY_STAT (++stat_objects_alloced
);
397 HEAVY_STAT (stat_bytes_alloced
+= size
);
399 CANARIFY_ALLOC(p
,real_size
);
400 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
401 sgen_binary_protocol_alloc (p
, vtable
, size
, sgen_client_get_provenance ());
402 g_assert (*p
== NULL
); /* FIXME disable this in non debug builds */
404 mono_atomic_store_seq (p
, vtable
);
410 gboolean sgen_debug_null
= 0;
413 sgen_alloc_obj (GCVTable vtable
, size_t size
)
418 if (!SGEN_CAN_ALIGN_UP (size
))
421 if (G_UNLIKELY (sgen_has_per_allocation_action
)) {
422 static int alloc_count
;
423 int current_alloc
= mono_atomic_inc_i32 (&alloc_count
);
425 if (sgen_verify_before_allocs
) {
426 if ((current_alloc
% sgen_verify_before_allocs
) == 0) {
428 sgen_check_whole_heap_stw ();
432 if (sgen_collect_before_allocs
) {
433 if (((current_alloc
% sgen_collect_before_allocs
) == 0) && sgen_nursery_section
) {
435 sgen_perform_collection (0, GENERATION_NURSERY
, "collect-before-alloc-triggered", TRUE
, TRUE
);
441 ENTER_CRITICAL_REGION
;
442 res
= sgen_try_alloc_obj_nolock (vtable
, size
);
444 EXIT_CRITICAL_REGION
;
447 EXIT_CRITICAL_REGION
;
450 res
= sgen_alloc_obj_nolock (vtable
, size
);
461 * To be used for interned strings and possibly MonoThread, reflection handles.
462 * We may want to explicitly free these objects.
465 sgen_alloc_obj_pinned (GCVTable vtable
, size_t size
)
470 if (!SGEN_CAN_ALIGN_UP (size
))
472 size
= ALIGN_UP (size
);
476 if (size
> SGEN_MAX_SMALL_OBJ_SIZE
) {
477 /* large objects are always pinned anyway */
478 p
= (GCObject
*)sgen_los_alloc_large_inner (vtable
, size
);
480 SGEN_ASSERT (9, sgen_client_vtable_is_inited (vtable
), "class %s:%s is not initialized", sgen_client_vtable_get_namespace (vtable
), sgen_client_vtable_get_name (vtable
));
481 p
= sgen_major_collector
.alloc_small_pinned_obj (vtable
, size
, SGEN_VTABLE_HAS_REFERENCES (vtable
));
484 SGEN_LOG (6, "Allocated pinned object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
485 increment_thread_allocation_counter (size
);
486 sgen_binary_protocol_alloc_pinned (p
, vtable
, size
, sgen_client_get_provenance ());
493 sgen_alloc_obj_mature (GCVTable vtable
, size_t size
)
497 if (!SGEN_CAN_ALIGN_UP (size
))
499 size
= ALIGN_UP (size
);
502 res
= alloc_degraded (vtable
, size
, TRUE
);
506 increment_thread_allocation_counter (size
);
513 * Clear the thread local TLAB variables for all threads.
516 sgen_clear_tlabs (void)
518 guint64 total_bytes_allocated_globally
= 0;
520 FOREACH_THREAD_ALL (info
) {
521 /* A new TLAB will be allocated when the thread does its first allocation */
522 info
->total_bytes_allocated
+= info
->tlab_next
- info
->tlab_start
;
523 total_bytes_allocated_globally
+= info
->total_bytes_allocated
;
524 info
->tlab_start
= NULL
;
525 info
->tlab_next
= NULL
;
526 info
->tlab_temp_end
= NULL
;
527 info
->tlab_real_end
= NULL
;
530 sgen_set_total_bytes_allocated(total_bytes_allocated_globally
);
534 sgen_init_allocator (void)
536 #ifdef HEAVY_STATISTICS
537 mono_counters_register ("# objects allocated", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
, &stat_objects_alloced
);
538 mono_counters_register ("bytes allocated", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
, &stat_bytes_alloced
);
539 mono_counters_register ("bytes allocated in LOS", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
, &stat_bytes_alloced_los
);
543 #endif /*HAVE_SGEN_GC*/