2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "sgen/sgen-gc.h"
31 #include "sgen/sgen-protocol.h"
32 #include "sgen/sgen-memory-governor.h"
33 #include "sgen/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "sgen/sgen-client.h"
36 #include "metadata/sgen-bridge-internal.h"
37 #include "metadata/gc-internal.h"
39 #define TV_DECLARE SGEN_TV_DECLARE
40 #define TV_GETTIME SGEN_TV_GETTIME
41 #define TV_ELAPSED SGEN_TV_ELAPSED
43 static void sgen_unified_suspend_restart_world (void);
44 static void sgen_unified_suspend_stop_world (void);
46 unsigned int sgen_global_stop_count
= 0;
49 align_pointer (void *ptr
)
52 p
+= sizeof (gpointer
) - 1;
53 p
&= ~ (sizeof (gpointer
) - 1);
58 static MonoContext cur_thread_ctx
;
60 static mword cur_thread_regs
[ARCH_NUM_REGS
];
64 update_current_thread_stack (void *start
)
67 #if !defined(USE_MONO_CTX)
68 void *reg_ptr
= cur_thread_regs
;
70 SgenThreadInfo
*info
= mono_thread_info_current ();
72 info
->client_info
.stack_start
= align_pointer (&stack_guard
);
73 g_assert (info
->client_info
.stack_start
>= info
->client_info
.stack_start_limit
&& info
->client_info
.stack_start
< info
->client_info
.stack_end
);
75 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx
);
76 memcpy (&info
->client_info
.ctx
, &cur_thread_ctx
, sizeof (MonoContext
));
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func
)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info
->client_info
.runtime_data
, NULL
, &info
->client_info
.ctx
);
80 ARCH_STORE_REGS (reg_ptr
);
81 memcpy (&info
->client_info
.regs
, reg_ptr
, sizeof (info
->client_info
.regs
));
82 if (mono_gc_get_gc_callbacks ()->thread_suspend_func
)
83 mono_gc_get_gc_callbacks ()->thread_suspend_func (info
->client_info
.runtime_data
, NULL
, NULL
);
88 is_ip_in_managed_allocator (MonoDomain
*domain
, gpointer ip
)
92 if (!mono_thread_internal_current ())
93 /* Happens during thread attach */
98 if (!sgen_has_critical_method ())
102 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
103 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
104 * to register the jit info for all GC critical methods after they are JITted/loaded.
106 ji
= mono_jit_info_table_find_internal (domain
, ip
, FALSE
, FALSE
);
110 return sgen_is_critical_method (mono_jit_info_get_method (ji
));
114 restart_threads_until_none_in_managed_allocator (void)
116 SgenThreadInfo
*info
;
117 int num_threads_died
= 0;
118 int sleep_duration
= -1;
121 int restart_count
= 0, restarted_count
= 0;
122 /* restart all threads that stopped in the
124 FOREACH_THREAD_SAFE (info
) {
126 if (info
->client_info
.skip
|| info
->client_info
.gc_disabled
|| info
->client_info
.suspend_done
)
128 if (mono_thread_info_is_live (info
) &&
129 (!info
->client_info
.stack_start
|| info
->client_info
.in_critical_region
|| info
->client_info
.info
.inside_critical_region
||
130 is_ip_in_managed_allocator (info
->client_info
.stopped_domain
, info
->client_info
.stopped_ip
))) {
131 binary_protocol_thread_restart ((gpointer
)mono_thread_info_get_tid (info
));
132 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info
->client_info
.info
.native_handle
);
133 result
= sgen_resume_thread (info
);
137 info
->client_info
.skip
= 1;
140 /* we set the stopped_ip to
141 NULL for threads which
142 we're not restarting so
143 that we can easily identify
145 info
->client_info
.stopped_ip
= NULL
;
146 info
->client_info
.stopped_domain
= NULL
;
147 info
->client_info
.suspend_done
= TRUE
;
149 } END_FOREACH_THREAD_SAFE
150 /* if no threads were restarted, we're done */
151 if (restart_count
== 0)
154 /* wait for the threads to signal their restart */
155 sgen_wait_for_suspend_ack (restart_count
);
157 if (sleep_duration
< 0) {
158 mono_thread_info_yield ();
161 g_usleep (sleep_duration
);
162 sleep_duration
+= 10;
165 /* stop them again */
166 FOREACH_THREAD (info
) {
168 if (info
->client_info
.skip
|| info
->client_info
.stopped_ip
== NULL
)
170 result
= sgen_suspend_thread (info
);
175 info
->client_info
.skip
= 1;
178 /* some threads might have died */
179 num_threads_died
+= restart_count
- restarted_count
;
180 /* wait for the threads to signal their suspension
182 sgen_wait_for_suspend_ack (restarted_count
);
185 return num_threads_died
;
189 acquire_gc_locks (void)
192 mono_thread_info_suspend_lock ();
196 release_gc_locks (void)
198 mono_thread_info_suspend_unlock ();
202 static TV_DECLARE (stop_world_time
);
203 static unsigned long max_pause_usec
= 0;
205 static guint64 time_stop_world
;
206 static guint64 time_restart_world
;
208 /* LOCKING: assumes the GC lock is held */
210 sgen_client_stop_world (int generation
)
212 TV_DECLARE (end_handshake
);
214 /* notify the profiler of the leftovers */
215 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
216 if (G_UNLIKELY (mono_profiler_events
& MONO_PROFILE_GC_MOVES
))
217 mono_sgen_gc_event_moves ();
221 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
222 sgen_process_togglerefs ();
224 update_current_thread_stack (&generation
);
226 sgen_global_stop_count
++;
227 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count
, mono_thread_info_current (), (gpointer
)mono_native_thread_id_get ());
228 TV_GETTIME (stop_world_time
);
230 if (mono_thread_info_unified_management_enabled ()) {
231 sgen_unified_suspend_stop_world ();
234 count
= sgen_thread_handshake (TRUE
);
235 dead
= restart_threads_until_none_in_managed_allocator ();
237 g_error ("More threads have died (%d) that been initialy suspended %d", dead
, count
);
240 SGEN_LOG (3, "world stopped");
242 TV_GETTIME (end_handshake
);
243 time_stop_world
+= TV_ELAPSED (stop_world_time
, end_handshake
);
245 sgen_memgov_collection_start (generation
);
246 if (sgen_need_bridge_processing ())
247 sgen_bridge_reset_data ();
250 /* LOCKING: assumes the GC lock is held */
252 sgen_client_restart_world (int generation
, GGTimingInfo
*timing
)
254 SgenThreadInfo
*info
;
256 TV_DECLARE (start_handshake
);
257 TV_DECLARE (end_bridge
);
258 unsigned long usec
, bridge_usec
;
260 /* notify the profiler of the leftovers */
261 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
262 if (G_UNLIKELY (mono_profiler_events
& MONO_PROFILE_GC_MOVES
))
263 mono_sgen_gc_event_moves ();
265 FOREACH_THREAD (info
) {
266 info
->client_info
.stack_start
= NULL
;
268 memset (&info
->client_info
.ctx
, 0, sizeof (MonoContext
));
270 memset (&info
->client_info
.regs
, 0, sizeof (info
->client_info
.regs
));
274 TV_GETTIME (start_handshake
);
276 if (mono_thread_info_unified_management_enabled ())
277 sgen_unified_suspend_restart_world ();
279 sgen_thread_handshake (FALSE
);
282 time_restart_world
+= TV_ELAPSED (start_handshake
, end_sw
);
283 usec
= TV_ELAPSED (stop_world_time
, end_sw
);
284 max_pause_usec
= MAX (usec
, max_pause_usec
);
286 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec
, (int)max_pause_usec
);
289 * We must release the thread info suspend lock after doing
290 * the thread handshake. Otherwise, if the GC stops the world
291 * and a thread is in the process of starting up, but has not
292 * yet registered (it's not in the thread_list), it is
293 * possible that the thread does register while the world is
294 * stopped. When restarting the GC will then try to restart
295 * said thread, but since it never got the suspend signal, it
296 * cannot answer the restart signal, so a deadlock results.
300 TV_GETTIME (end_bridge
);
301 bridge_usec
= TV_ELAPSED (end_sw
, end_bridge
);
304 timing
[0].stw_time
= usec
;
305 timing
[0].bridge_time
= bridge_usec
;
310 mono_sgen_init_stw (void)
312 mono_counters_register ("World stop", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_stop_world
);
313 mono_counters_register ("World restart", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_restart_world
);
316 /* Unified suspend code */
319 sgen_is_thread_in_current_stw (SgenThreadInfo
*info
)
322 A thread explicitly asked to be skiped because it holds no managed state.
323 This is used by TP and finalizer threads.
324 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
326 if (info
->client_info
.gc_disabled
) {
331 We have detected that this thread is failing/dying, ignore it.
332 FIXME: can't we merge this with thread_is_dying?
334 if (info
->client_info
.skip
) {
339 Suspending the current thread will deadlock us, bad idea.
341 if (info
== mono_thread_info_current ()) {
346 We can't suspend the workers that will do all the heavy lifting.
347 FIXME Use some state bit in SgenThreadInfo for this.
349 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info
))) {
354 The thread has signaled that it started to detach, ignore it.
355 FIXME: can't we merge this with skip
357 if (!mono_thread_info_is_live (info
)) {
365 update_sgen_info (SgenThreadInfo
*info
)
369 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
370 info
->client_info
.stopped_domain
= mono_thread_info_tls_get (info
, TLS_KEY_DOMAIN
);
371 info
->client_info
.stopped_ip
= (gpointer
) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info
)->ctx
);
372 stack_start
= (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info
)->ctx
) - REDZONE_SIZE
;
374 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
375 if (stack_start
< (char*)info
->client_info
.stack_start_limit
|| stack_start
>= (char*)info
->client_info
.stack_end
)
376 g_error ("BAD STACK");
378 info
->client_info
.stack_start
= stack_start
;
380 info
->client_info
.ctx
= mono_thread_info_get_suspend_state (info
)->ctx
;
382 g_assert_not_reached ();
387 sgen_unified_suspend_stop_world (void)
390 SgenThreadInfo
*info
;
391 int sleep_duration
= -1;
393 mono_threads_begin_global_suspend ();
394 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
396 FOREACH_THREAD_SAFE (info
) {
397 info
->client_info
.skip
= FALSE
;
398 info
->client_info
.suspend_done
= FALSE
;
399 if (sgen_is_thread_in_current_stw (info
)) {
400 info
->client_info
.skip
= !mono_thread_info_begin_suspend (info
, FALSE
);
401 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
);
403 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info
), info
->skip
);
405 } END_FOREACH_THREAD_SAFE
407 mono_thread_info_current ()->client_info
.suspend_done
= TRUE
;
408 mono_threads_wait_pending_operations ();
412 FOREACH_THREAD_SAFE (info
) {
413 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
)) {
414 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
));
419 All threads that reach here are pristine suspended. This means the following:
421 - We haven't accepted the previous suspend as good.
422 - We haven't gave up on it for this STW (it's either bad or asked not to)
424 if (!mono_threads_core_check_suspend_result (info
)) {
425 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info
));
426 info
->client_info
.skip
= TRUE
;
427 } else if (mono_thread_info_in_critical_location (info
)) {
429 g_assert (mono_thread_info_suspend_count (info
) == 1);
430 res
= mono_thread_info_begin_resume (info
);
431 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info
), res
);
435 info
->client_info
.skip
= TRUE
;
437 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info
));
438 g_assert (!info
->client_info
.in_critical_region
);
439 info
->client_info
.suspend_done
= TRUE
;
441 } END_FOREACH_THREAD_SAFE
443 if (restart_counter
== 0)
445 mono_threads_wait_pending_operations ();
447 if (sleep_duration
< 0) {
455 g_usleep (sleep_duration
);
456 sleep_duration
+= 10;
459 FOREACH_THREAD_SAFE (info
) {
460 if (sgen_is_thread_in_current_stw (info
) && mono_thread_info_is_running (info
)) {
461 gboolean res
= mono_thread_info_begin_suspend (info
, FALSE
);
462 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info
), res
);
464 info
->client_info
.skip
= TRUE
;
466 } END_FOREACH_THREAD_SAFE
468 mono_threads_wait_pending_operations ();
471 FOREACH_THREAD_SAFE (info
) {
472 if (sgen_is_thread_in_current_stw (info
)) {
473 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info
));
474 g_assert (info
->client_info
.suspend_done
);
475 update_sgen_info (info
);
477 g_assert (!info
->client_info
.suspend_done
|| info
== mono_thread_info_current ());
479 } END_FOREACH_THREAD_SAFE
483 sgen_unified_suspend_restart_world (void)
485 SgenThreadInfo
*info
;
487 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
488 FOREACH_THREAD_SAFE (info
) {
489 if (sgen_is_thread_in_current_stw (info
)) {
490 g_assert (mono_thread_info_begin_resume (info
));
491 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info
));
493 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info
));
495 } END_FOREACH_THREAD_SAFE
497 mono_threads_wait_pending_operations ();
498 mono_threads_end_global_suspend ();