3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw
);
40 guint64
mono_time_since_last_stw ()
42 if (end_of_last_stw
== 0)
45 TV_DECLARE (current_time
);
46 TV_GETTIME (current_time
);
47 return TV_ELAPSED (end_of_last_stw
, current_time
);
50 unsigned int sgen_global_stop_count
= 0;
53 align_pointer (void *ptr
)
56 p
+= sizeof (gpointer
) - 1;
57 p
&= ~ (sizeof (gpointer
) - 1);
62 update_current_thread_stack (void *start
)
65 SgenThreadInfo
*info
= mono_thread_info_current ();
67 info
->client_info
.stack_start
= align_pointer (&stack_guard
);
68 g_assert (info
->client_info
.stack_start
);
69 g_assert (info
->client_info
.stack_start
>= info
->client_info
.info
.stack_start_limit
&& info
->client_info
.stack_start
< info
->client_info
.info
.stack_end
);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info
->client_info
.ctx
);
73 #elif defined (HOST_WASM)
76 g_error ("Sgen STW requires a working mono-context");
79 if (mono_gc_get_gc_callbacks ()->thread_suspend_func
)
80 mono_gc_get_gc_callbacks ()->thread_suspend_func (info
->client_info
.runtime_data
, NULL
, &info
->client_info
.ctx
);
84 acquire_gc_locks (void)
87 mono_thread_info_suspend_lock ();
91 release_gc_locks (void)
93 mono_thread_info_suspend_unlock ();
97 static TV_DECLARE (stop_world_time
);
98 static unsigned long max_stw_pause_time
= 0;
100 static guint64 time_stop_world
;
101 static guint64 time_restart_world
;
103 /* LOCKING: assumes the GC lock is held */
105 sgen_client_stop_world (int generation
, gboolean serial_collection
)
107 TV_DECLARE (end_handshake
);
109 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_PRE_STOP_WORLD
, generation
, serial_collection
));
113 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED
, generation
, serial_collection
));
115 update_current_thread_stack (&generation
);
117 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
118 sgen_process_togglerefs ();
120 sgen_global_stop_count
++;
121 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count
, mono_thread_info_current (), (gpointer
) (gsize
) mono_native_thread_id_get ());
122 TV_GETTIME (stop_world_time
);
124 sgen_unified_suspend_stop_world ();
126 SGEN_LOG (3, "world stopped");
128 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_POST_STOP_WORLD
, generation
, serial_collection
));
130 TV_GETTIME (end_handshake
);
132 unsigned long stop_world_tv_elapsed
= TV_ELAPSED (stop_world_time
, end_handshake
);
133 SGEN_LOG (2, "stopping world (time: %d usec)", (int)stop_world_tv_elapsed
/ 10);
134 time_stop_world
+= stop_world_tv_elapsed
;
136 sgen_memgov_collection_start (generation
);
137 if (sgen_need_bridge_processing ())
138 sgen_bridge_reset_data ();
141 /* LOCKING: assumes the GC lock is held */
143 sgen_client_restart_world (int generation
, gboolean serial_collection
, gint64
*stw_time
)
146 TV_DECLARE (start_handshake
);
148 /* notify the profiler of the leftovers */
149 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
150 if (MONO_PROFILER_ENABLED (gc_moves
))
151 mono_sgen_gc_event_moves ();
153 if (MONO_PROFILER_ENABLED (gc_resize
))
154 mono_sgen_gc_event_resize ();
156 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_PRE_START_WORLD
, generation
, serial_collection
));
158 FOREACH_THREAD_ALL (info
) {
159 info
->client_info
.stack_start
= NULL
;
160 memset (&info
->client_info
.ctx
, 0, sizeof (MonoContext
));
163 TV_GETTIME (start_handshake
);
165 sgen_unified_suspend_restart_world ();
169 unsigned long restart_world_tv_elapsed
= TV_ELAPSED (start_handshake
, end_sw
);
170 SGEN_LOG (2, "restarting world (time: %d usec)", (int)restart_world_tv_elapsed
/ 10);
171 time_restart_world
+= restart_world_tv_elapsed
;
173 unsigned long stw_pause_time
= TV_ELAPSED (stop_world_time
, end_sw
);
174 max_stw_pause_time
= MAX (stw_pause_time
, max_stw_pause_time
);
175 end_of_last_stw
= end_sw
;
177 SGEN_LOG (1, "restarted (pause time: %d usec, max: %d usec)", (int)stw_pause_time
/ 10, (int)max_stw_pause_time
/ 10);
179 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_POST_START_WORLD
, generation
, serial_collection
));
182 * We must release the thread info suspend lock after doing
183 * the thread handshake. Otherwise, if the GC stops the world
184 * and a thread is in the process of starting up, but has not
185 * yet registered (it's not in the thread_list), it is
186 * possible that the thread does register while the world is
187 * stopped. When restarting the GC will then try to restart
188 * said thread, but since it never got the suspend signal, it
189 * cannot answer the restart signal, so a deadlock results.
193 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED
, generation
, serial_collection
));
195 *stw_time
= stw_pause_time
;
199 mono_sgen_init_stw (void)
201 mono_counters_register ("World stop", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_stop_world
);
202 mono_counters_register ("World restart", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_restart_world
);
205 /* Unified suspend code */
208 sgen_is_thread_in_current_stw (SgenThreadInfo
*info
, int *reason
)
211 * No need to check MONO_THREAD_INFO_FLAGS_NO_GC here as we rely on the
212 * FOREACH_THREAD_EXCLUDE macro to skip such threads for us.
216 We have detected that this thread is failing/dying, ignore it.
217 FIXME: can't we merge this with thread_is_dying?
219 if (info
->client_info
.skip
) {
226 Suspending the current thread will deadlock us, bad idea.
228 if (info
== mono_thread_info_current ()) {
235 We can't suspend the workers that will do all the heavy lifting.
236 FIXME Use some state bit in SgenThreadInfo for this.
238 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info
))) {
245 The thread has signaled that it started to detach, ignore it.
246 FIXME: can't we merge this with skip
248 if (!mono_thread_info_is_live (info
)) {
258 sgen_unified_suspend_stop_world (void)
260 int sleep_duration
= -1;
262 // we can't lead STW if we promised not to safepoint.
263 g_assert (!mono_thread_info_will_not_safepoint (mono_thread_info_current ()));
265 mono_threads_begin_global_suspend ();
266 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
268 for (MonoThreadSuspendPhase phase
= MONO_THREAD_SUSPEND_PHASE_INITIAL
; phase
< MONO_THREAD_SUSPEND_PHASE_COUNT
; phase
++) {
269 gboolean need_next_phase
= FALSE
;
270 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
271 /* look at every thread in the first phase. */
272 if (phase
== MONO_THREAD_SUSPEND_PHASE_INITIAL
) {
273 info
->client_info
.skip
= FALSE
;
274 info
->client_info
.suspend_done
= FALSE
;
276 /* skip threads suspended by previous phase. */
277 /* threads with info->client_info->skip set to TRUE will be skipped by sgen_is_thread_in_current_stw. */
278 if (info
->client_info
.suspend_done
)
283 if (!sgen_is_thread_in_current_stw (info
, &reason
)) {
284 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND-%d] IGNORE thread %p skip %s reason %d\n", (int)phase
, mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false", reason
);
288 switch (mono_thread_info_begin_suspend (info
, phase
)) {
289 case MONO_THREAD_BEGIN_SUSPEND_SUSPENDED
:
290 info
->client_info
.skip
= FALSE
;
292 case MONO_THREAD_BEGIN_SUSPEND_SKIP
:
293 info
->client_info
.skip
= TRUE
;
295 case MONO_THREAD_BEGIN_SUSPEND_NEXT_PHASE
:
296 need_next_phase
= TRUE
;
299 g_assert_not_reached ();
302 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND-%d] SUSPEND thread %p skip %s\n", (int)phase
, mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
303 } FOREACH_THREAD_END
;
305 mono_thread_info_current ()->client_info
.suspend_done
= TRUE
;
306 mono_threads_wait_pending_operations ();
308 if (!need_next_phase
)
313 gint restart_counter
= 0;
315 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
319 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
, &reason
)) {
320 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
, NULL
), reason
);
325 All threads that reach here are pristine suspended. This means the following:
327 - We haven't accepted the previous suspend as good.
328 - We haven't gave up on it for this STW (it's either bad or asked not to)
330 if (!mono_thread_info_in_critical_location (info
)) {
331 info
->client_info
.suspend_done
= TRUE
;
333 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info
));
337 suspend_count
= mono_thread_info_suspend_count (info
);
338 if (!(suspend_count
== 1))
339 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info
), suspend_count
);
341 info
->client_info
.skip
= !mono_thread_info_begin_pulse_resume_and_request_suspension (info
);
342 if (!info
->client_info
.skip
)
343 restart_counter
+= 1;
345 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
348 mono_threads_wait_pending_operations ();
350 if (restart_counter
== 0)
353 if (sleep_duration
< 0) {
354 mono_thread_info_yield ();
357 g_usleep (sleep_duration
);
358 sleep_duration
+= 10;
361 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
363 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
, &reason
)) {
364 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
, NULL
), reason
);
368 if (!mono_thread_info_is_running (info
)) {
369 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info
));
373 switch (mono_thread_info_begin_suspend (info
, MONO_THREAD_SUSPEND_PHASE_MOPUP
)) {
374 case MONO_THREAD_BEGIN_SUSPEND_SUSPENDED
:
375 info
->client_info
.skip
= FALSE
;
377 case MONO_THREAD_BEGIN_SUSPEND_SKIP
:
378 info
->client_info
.skip
= TRUE
;
380 case MONO_THREAD_BEGIN_SUSPEND_NEXT_PHASE
:
381 g_assert_not_reached ();
383 g_assert_not_reached ();
386 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
389 mono_threads_wait_pending_operations ();
392 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
396 if (!sgen_is_thread_in_current_stw (info
, &reason
)) {
397 g_assert (!info
->client_info
.suspend_done
|| info
== mono_thread_info_current ());
399 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info
), reason
);
403 g_assert (info
->client_info
.suspend_done
);
405 info
->client_info
.ctx
= mono_thread_info_get_suspend_state (info
)->ctx
;
407 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
408 info
->client_info
.stack_start
= (gpointer
) ((char*)MONO_CONTEXT_GET_SP (&info
->client_info
.ctx
) - REDZONE_SIZE
);
410 if (info
->client_info
.stack_start
< info
->client_info
.info
.stack_start_limit
411 || info
->client_info
.stack_start
>= info
->client_info
.info
.stack_end
) {
413 * Thread context is in unhandled state, most likely because it is
414 * dying. We don't scan it.
415 * FIXME We should probably rework and check the valid flag instead.
417 info
->client_info
.stack_start
= NULL
;
420 stopped_ip
= (gpointer
) (MONO_CONTEXT_GET_IP (&info
->client_info
.ctx
));
422 sgen_binary_protocol_thread_suspend ((gpointer
) mono_thread_info_get_tid (info
), stopped_ip
);
424 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
425 mono_thread_info_get_tid (info
), stopped_ip
, info
->client_info
.stack_start
, info
->client_info
.stack_start
? info
->client_info
.info
.stack_end
: NULL
);
430 sgen_unified_suspend_restart_world (void)
432 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
433 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
435 if (sgen_is_thread_in_current_stw (info
, &reason
)) {
436 g_assert (mono_thread_info_begin_resume (info
));
437 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info
));
439 sgen_binary_protocol_thread_restart ((gpointer
) mono_thread_info_get_tid (info
));
441 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info
), reason
);
445 mono_threads_wait_pending_operations ();
446 mono_threads_end_global_suspend ();