2 * sgen-alloc.c: Object allocation routines + managed allocators
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 * ######################################################################
29 * ######## Object allocation
30 * ######################################################################
31 * This section of code deals with allocating memory for objects.
32 * There are several ways:
33 * *) allocate large objects
34 * *) allocate normal objects
35 * *) fast lock-free allocation
36 * *) allocation of pinned objects
44 #include "mono/sgen/sgen-gc.h"
45 #include "mono/sgen/sgen-protocol.h"
46 #include "mono/sgen/sgen-memory-governor.h"
47 #include "mono/sgen/sgen-client.h"
48 #include "mono/utils/mono-memory-model.h"
50 #define ALIGN_UP SGEN_ALIGN_UP
51 #define ALLOC_ALIGN SGEN_ALLOC_ALIGN
52 #define MAX_SMALL_OBJ_SIZE SGEN_MAX_SMALL_OBJ_SIZE
54 #ifdef HEAVY_STATISTICS
55 static guint64 stat_objects_alloced
= 0;
56 static guint64 stat_bytes_alloced
= 0;
57 static guint64 stat_bytes_alloced_los
= 0;
62 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
63 * from nursery fragments.
64 * tlab_next is the pointer to the space inside the TLAB where the next object will
66 * tlab_temp_end is the pointer to the end of the temporary space reserved for
67 * the allocation: it allows us to set the scan starts at reasonable intervals.
68 * tlab_real_end points to the end of the TLAB.
72 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
73 * variables for next+temp_end ?
76 static __thread
char *tlab_start
;
77 static __thread
char *tlab_next
;
78 static __thread
char *tlab_temp_end
;
79 static __thread
char *tlab_real_end
;
80 /* Used by the managed allocator/wbarrier */
81 static __thread
char **tlab_next_addr MONO_ATTR_USED
;
85 #define TLAB_START tlab_start
86 #define TLAB_NEXT tlab_next
87 #define TLAB_TEMP_END tlab_temp_end
88 #define TLAB_REAL_END tlab_real_end
90 #define TLAB_START (__thread_info__->tlab_start)
91 #define TLAB_NEXT (__thread_info__->tlab_next)
92 #define TLAB_TEMP_END (__thread_info__->tlab_temp_end)
93 #define TLAB_REAL_END (__thread_info__->tlab_real_end)
97 alloc_degraded (GCVTable vtable
, size_t size
, gboolean for_mature
)
102 sgen_client_degraded_allocation (size
);
103 SGEN_ATOMIC_ADD_P (degraded_mode
, size
);
104 sgen_ensure_free_space (size
);
106 if (sgen_need_major_collection (size
))
107 sgen_perform_collection (size
, GENERATION_OLD
, "mature allocation failure", !for_mature
);
111 p
= major_collector
.alloc_degraded (vtable
, size
);
114 binary_protocol_alloc_degraded (p
, vtable
, size
, sgen_client_get_provenance ());
120 zero_tlab_if_necessary (void *p
, size_t size
)
122 if (nursery_clear_policy
== CLEAR_AT_TLAB_CREATION
|| nursery_clear_policy
== CLEAR_AT_TLAB_CREATION_DEBUG
) {
126 * This function is called for all allocations in
127 * TLABs. TLABs originate from fragments, which are
128 * initialized to be faux arrays. The remainder of
129 * the fragments are zeroed out at initialization for
130 * CLEAR_AT_GC, so here we just need to make sure that
131 * the array header is zeroed. Since we don't know
132 * whether we're called for the start of a fragment or
133 * for somewhere in between, we zero in any case, just
136 sgen_client_zero_array_fill_header (p
, size
);
141 * Provide a variant that takes just the vtable for small fixed-size objects.
142 * The aligned size is already computed and stored in vt->gc_descr.
143 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
144 * processing. We can keep track of where objects start, for example,
145 * so when we scan the thread stacks for pinned objects, we can start
146 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
149 sgen_alloc_obj_nolock (GCVTable vtable
, size_t size
)
151 /* FIXME: handle OOM */
154 size_t real_size
= size
;
159 HEAVY_STAT (++stat_objects_alloced
);
160 if (real_size
<= SGEN_MAX_SMALL_OBJ_SIZE
)
161 HEAVY_STAT (stat_bytes_alloced
+= size
);
163 HEAVY_STAT (stat_bytes_alloced_los
+= size
);
165 size
= ALIGN_UP (size
);
167 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable
), "VTable without descriptor");
169 if (G_UNLIKELY (has_per_allocation_action
)) {
170 static int alloc_count
;
171 int current_alloc
= InterlockedIncrement (&alloc_count
);
173 if (collect_before_allocs
) {
174 if (((current_alloc
% collect_before_allocs
) == 0) && nursery_section
) {
175 sgen_perform_collection (0, GENERATION_NURSERY
, "collect-before-alloc-triggered", TRUE
);
176 if (!degraded_mode
&& sgen_can_alloc_size (size
) && real_size
<= SGEN_MAX_SMALL_OBJ_SIZE
) {
178 g_assert_not_reached ();
181 } else if (verify_before_allocs
) {
182 if ((current_alloc
% verify_before_allocs
) == 0)
183 sgen_check_whole_heap_stw ();
188 * We must already have the lock here instead of after the
189 * fast path because we might be interrupted in the fast path
190 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
191 * and we'll end up allocating an object in a fragment which
192 * no longer belongs to us.
194 * The managed allocator does not do this, but it's treated
195 * specially by the world-stopping code.
198 if (real_size
> SGEN_MAX_SMALL_OBJ_SIZE
) {
199 p
= (void **)sgen_los_alloc_large_inner (vtable
, ALIGN_UP (real_size
));
201 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
203 p
= (void**)TLAB_NEXT
;
204 /* FIXME: handle overflow */
205 new_next
= (char*)p
+ size
;
206 TLAB_NEXT
= new_next
;
208 if (G_LIKELY (new_next
< TLAB_TEMP_END
)) {
212 * FIXME: We might need a memory barrier here so the change to tlab_next is
213 * visible before the vtable store.
216 CANARIFY_ALLOC(p
,real_size
);
217 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
218 binary_protocol_alloc (p
, vtable
, size
, sgen_client_get_provenance ());
219 g_assert (*p
== NULL
);
220 mono_atomic_store_seq (p
, vtable
);
227 /* there are two cases: the object is too big or we run out of space in the TLAB */
228 /* we also reach here when the thread does its first allocation after a minor
229 * collection, since the tlab_ variables are initialized to NULL.
230 * there can be another case (from ORP), if we cooperate with the runtime a bit:
231 * objects that need finalizers can have the high bit set in their size
232 * so the above check fails and we can readily add the object to the queue.
233 * This avoids taking again the GC lock when registering, but this is moot when
234 * doing thread-local allocation, so it may not be a good idea.
236 if (TLAB_NEXT
>= TLAB_REAL_END
) {
237 int available_in_tlab
;
239 * Run out of space in the TLAB. When this happens, some amount of space
240 * remains in the TLAB, but not enough to satisfy the current allocation
241 * request. Currently, we retire the TLAB in all cases, later we could
242 * keep it if the remaining space is above a treshold, and satisfy the
243 * allocation directly from the nursery.
246 /* when running in degraded mode, we continue allocing that way
247 * for a while, to decrease the number of useless nursery collections.
249 if (degraded_mode
&& degraded_mode
< DEFAULT_NURSERY_SIZE
)
250 return alloc_degraded (vtable
, size
, FALSE
);
252 available_in_tlab
= (int)(TLAB_REAL_END
- TLAB_NEXT
);//We'll never have tlabs > 2Gb
253 if (size
> tlab_size
|| available_in_tlab
> SGEN_MAX_NURSERY_WASTE
) {
254 /* Allocate directly from the nursery */
255 p
= (void **)sgen_nursery_alloc (size
);
258 * We couldn't allocate from the nursery, so we try
259 * collecting. Even after the collection, we might
260 * still not have enough memory to allocate the
261 * object. The reason will most likely be that we've
262 * run out of memory, but there is the theoretical
263 * possibility that other threads might have consumed
264 * the freed up memory ahead of us.
266 * What we do in this case is allocate degraded, i.e.,
267 * from the major heap.
269 * Ideally we'd like to detect the case of other
270 * threads allocating ahead of us and loop (if we
271 * always loop we will loop endlessly in the case of
274 sgen_ensure_free_space (real_size
);
276 p
= (void **)sgen_nursery_alloc (size
);
279 return alloc_degraded (vtable
, size
, FALSE
);
281 zero_tlab_if_necessary (p
, size
);
283 size_t alloc_size
= 0;
285 SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START
, TLAB_REAL_END
, (long)(TLAB_REAL_END
- TLAB_NEXT
- size
));
286 sgen_nursery_retire_region (p
, available_in_tlab
);
288 p
= (void **)sgen_nursery_alloc_range (tlab_size
, size
, &alloc_size
);
290 /* See comment above in similar case. */
291 sgen_ensure_free_space (tlab_size
);
293 p
= (void **)sgen_nursery_alloc_range (tlab_size
, size
, &alloc_size
);
296 return alloc_degraded (vtable
, size
, FALSE
);
298 /* Allocate a new TLAB from the current nursery fragment */
299 TLAB_START
= (char*)p
;
300 TLAB_NEXT
= TLAB_START
;
301 TLAB_REAL_END
= TLAB_START
+ alloc_size
;
302 TLAB_TEMP_END
= TLAB_START
+ MIN (SGEN_SCAN_START_SIZE
, alloc_size
);
304 zero_tlab_if_necessary (TLAB_START
, alloc_size
);
306 /* Allocate from the TLAB */
307 p
= (void **)TLAB_NEXT
;
309 sgen_set_nursery_scan_start ((char*)p
);
312 /* Reached tlab_temp_end */
314 /* record the scan start so we can find pinned objects more easily */
315 sgen_set_nursery_scan_start ((char*)p
);
316 /* we just bump tlab_temp_end as well */
317 TLAB_TEMP_END
= MIN (TLAB_REAL_END
, TLAB_NEXT
+ SGEN_SCAN_START_SIZE
);
318 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT
, TLAB_TEMP_END
);
320 CANARIFY_ALLOC(p
,real_size
);
324 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
325 binary_protocol_alloc (p
, vtable
, size
, sgen_client_get_provenance ());
326 mono_atomic_store_seq (p
, vtable
);
333 sgen_try_alloc_obj_nolock (GCVTable vtable
, size_t size
)
337 size_t real_size
= size
;
342 size
= ALIGN_UP (size
);
343 SGEN_ASSERT (9, real_size
>= SGEN_CLIENT_MINIMUM_OBJECT_SIZE
, "Object too small");
345 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable
), "VTable without descriptor");
347 if (real_size
> SGEN_MAX_SMALL_OBJ_SIZE
)
350 if (G_UNLIKELY (size
> tlab_size
)) {
351 /* Allocate directly from the nursery */
352 p
= (void **)sgen_nursery_alloc (size
);
355 sgen_set_nursery_scan_start ((char*)p
);
357 /*FIXME we should use weak memory ops here. Should help specially on x86. */
358 zero_tlab_if_necessary (p
, size
);
360 int available_in_tlab
;
362 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
364 p
= (void**)TLAB_NEXT
;
365 /* FIXME: handle overflow */
366 new_next
= (char*)p
+ size
;
368 real_end
= TLAB_REAL_END
;
369 available_in_tlab
= (int)(real_end
- (char*)p
);//We'll never have tlabs > 2Gb
371 if (G_LIKELY (new_next
< real_end
)) {
372 TLAB_NEXT
= new_next
;
374 /* Second case, we overflowed temp end */
375 if (G_UNLIKELY (new_next
>= TLAB_TEMP_END
)) {
376 sgen_set_nursery_scan_start (new_next
);
377 /* we just bump tlab_temp_end as well */
378 TLAB_TEMP_END
= MIN (TLAB_REAL_END
, TLAB_NEXT
+ SGEN_SCAN_START_SIZE
);
379 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT
, TLAB_TEMP_END
);
381 } else if (available_in_tlab
> SGEN_MAX_NURSERY_WASTE
) {
382 /* Allocate directly from the nursery */
383 p
= (void **)sgen_nursery_alloc (size
);
387 zero_tlab_if_necessary (p
, size
);
389 size_t alloc_size
= 0;
391 sgen_nursery_retire_region (p
, available_in_tlab
);
392 new_next
= (char *)sgen_nursery_alloc_range (tlab_size
, size
, &alloc_size
);
393 p
= (void**)new_next
;
397 TLAB_START
= (char*)new_next
;
398 TLAB_NEXT
= new_next
+ size
;
399 TLAB_REAL_END
= new_next
+ alloc_size
;
400 TLAB_TEMP_END
= new_next
+ MIN (SGEN_SCAN_START_SIZE
, alloc_size
);
401 sgen_set_nursery_scan_start ((char*)p
);
403 zero_tlab_if_necessary (new_next
, alloc_size
);
407 HEAVY_STAT (++stat_objects_alloced
);
408 HEAVY_STAT (stat_bytes_alloced
+= size
);
410 CANARIFY_ALLOC(p
,real_size
);
411 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
412 binary_protocol_alloc (p
, vtable
, size
, sgen_client_get_provenance ());
413 g_assert (*p
== NULL
); /* FIXME disable this in non debug builds */
415 mono_atomic_store_seq (p
, vtable
);
421 sgen_alloc_obj (GCVTable vtable
, size_t size
)
426 if (!SGEN_CAN_ALIGN_UP (size
))
429 if (G_UNLIKELY (has_per_allocation_action
)) {
430 static int alloc_count
;
431 int current_alloc
= InterlockedIncrement (&alloc_count
);
433 if (verify_before_allocs
) {
434 if ((current_alloc
% verify_before_allocs
) == 0)
435 sgen_check_whole_heap_stw ();
437 if (collect_before_allocs
) {
438 if (((current_alloc
% collect_before_allocs
) == 0) && nursery_section
) {
440 sgen_perform_collection (0, GENERATION_NURSERY
, "collect-before-alloc-triggered", TRUE
);
446 ENTER_CRITICAL_REGION
;
447 res
= sgen_try_alloc_obj_nolock (vtable
, size
);
449 EXIT_CRITICAL_REGION
;
452 EXIT_CRITICAL_REGION
;
455 res
= sgen_alloc_obj_nolock (vtable
, size
);
461 * To be used for interned strings and possibly MonoThread, reflection handles.
462 * We may want to explicitly free these objects.
465 sgen_alloc_obj_pinned (GCVTable vtable
, size_t size
)
469 if (!SGEN_CAN_ALIGN_UP (size
))
471 size
= ALIGN_UP (size
);
475 if (size
> SGEN_MAX_SMALL_OBJ_SIZE
) {
476 /* large objects are always pinned anyway */
477 p
= (GCObject
*)sgen_los_alloc_large_inner (vtable
, size
);
479 SGEN_ASSERT (9, sgen_client_vtable_is_inited (vtable
), "class %s:%s is not initialized", sgen_client_vtable_get_namespace (vtable
), sgen_client_vtable_get_name (vtable
));
480 p
= major_collector
.alloc_small_pinned_obj (vtable
, size
, SGEN_VTABLE_HAS_REFERENCES (vtable
));
483 SGEN_LOG (6, "Allocated pinned object %p, vtable: %p (%s), size: %zd", p
, vtable
, sgen_client_vtable_get_name (vtable
), size
);
484 binary_protocol_alloc_pinned (p
, vtable
, size
, sgen_client_get_provenance ());
491 sgen_alloc_obj_mature (GCVTable vtable
, size_t size
)
495 if (!SGEN_CAN_ALIGN_UP (size
))
497 size
= ALIGN_UP (size
);
500 res
= alloc_degraded (vtable
, size
, TRUE
);
507 sgen_init_tlab_info (SgenThreadInfo
* info
)
509 #ifndef HAVE_KW_THREAD
510 SgenThreadInfo
*__thread_info__
= info
;
513 info
->tlab_start_addr
= &TLAB_START
;
514 info
->tlab_next_addr
= &TLAB_NEXT
;
515 info
->tlab_temp_end_addr
= &TLAB_TEMP_END
;
516 info
->tlab_real_end_addr
= &TLAB_REAL_END
;
518 #ifdef HAVE_KW_THREAD
519 tlab_next_addr
= &tlab_next
;
524 * Clear the thread local TLAB variables for all threads.
527 sgen_clear_tlabs (void)
529 SgenThreadInfo
*info
;
531 FOREACH_THREAD (info
) {
532 /* A new TLAB will be allocated when the thread does its first allocation */
533 *info
->tlab_start_addr
= NULL
;
534 *info
->tlab_next_addr
= NULL
;
535 *info
->tlab_temp_end_addr
= NULL
;
536 *info
->tlab_real_end_addr
= NULL
;
541 sgen_init_allocator (void)
543 #if defined(HAVE_KW_THREAD) && !defined(SGEN_WITHOUT_MONO)
544 int tlab_next_addr_offset
= -1;
545 int tlab_temp_end_offset
= -1;
548 MONO_THREAD_VAR_OFFSET (tlab_next_addr
, tlab_next_addr_offset
);
549 MONO_THREAD_VAR_OFFSET (tlab_temp_end
, tlab_temp_end_offset
);
551 mono_tls_key_set_offset (TLS_KEY_SGEN_TLAB_NEXT_ADDR
, tlab_next_addr_offset
);
552 mono_tls_key_set_offset (TLS_KEY_SGEN_TLAB_TEMP_END
, tlab_temp_end_offset
);
555 #ifdef HEAVY_STATISTICS
556 mono_counters_register ("# objects allocated", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
, &stat_objects_alloced
);
557 mono_counters_register ("bytes allocated", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
, &stat_bytes_alloced
);
558 mono_counters_register ("bytes allocated in LOS", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
, &stat_bytes_alloced_los
);
562 #endif /*HAVE_SGEN_GC*/