3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw
);
40 guint64
mono_time_since_last_stw ()
42 if (end_of_last_stw
== 0)
45 TV_DECLARE (current_time
);
46 TV_GETTIME (current_time
);
47 return TV_ELAPSED (end_of_last_stw
, current_time
);
50 unsigned int sgen_global_stop_count
= 0;
53 align_pointer (void *ptr
)
56 p
+= sizeof (gpointer
) - 1;
57 p
&= ~ (sizeof (gpointer
) - 1);
62 update_current_thread_stack (void *start
)
65 SgenThreadInfo
*info
= mono_thread_info_current ();
67 info
->client_info
.stack_start
= align_pointer (&stack_guard
);
68 g_assert (info
->client_info
.stack_start
);
69 g_assert (info
->client_info
.stack_start
>= info
->client_info
.info
.stack_start_limit
&& info
->client_info
.stack_start
< info
->client_info
.info
.stack_end
);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info
->client_info
.ctx
);
73 #elif defined (HOST_WASM)
76 g_error ("Sgen STW requires a working mono-context");
79 if (mono_gc_get_gc_callbacks ()->thread_suspend_func
)
80 mono_gc_get_gc_callbacks ()->thread_suspend_func (info
->client_info
.runtime_data
, NULL
, &info
->client_info
.ctx
);
84 acquire_gc_locks (void)
87 mono_thread_info_suspend_lock ();
91 release_gc_locks (void)
93 mono_thread_info_suspend_unlock ();
97 static TV_DECLARE (stop_world_time
);
98 static unsigned long max_pause_usec
= 0;
100 static guint64 time_stop_world
;
101 static guint64 time_restart_world
;
103 /* LOCKING: assumes the GC lock is held */
105 sgen_client_stop_world (int generation
, gboolean serial_collection
)
107 TV_DECLARE (end_handshake
);
109 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_PRE_STOP_WORLD
, generation
, serial_collection
));
113 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED
, generation
, serial_collection
));
115 update_current_thread_stack (&generation
);
117 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
118 sgen_process_togglerefs ();
120 sgen_global_stop_count
++;
121 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count
, mono_thread_info_current (), (gpointer
) (gsize
) mono_native_thread_id_get ());
122 TV_GETTIME (stop_world_time
);
124 sgen_unified_suspend_stop_world ();
126 SGEN_LOG (3, "world stopped");
128 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_POST_STOP_WORLD
, generation
, serial_collection
));
130 TV_GETTIME (end_handshake
);
131 time_stop_world
+= TV_ELAPSED (stop_world_time
, end_handshake
);
133 sgen_memgov_collection_start (generation
);
134 if (sgen_need_bridge_processing ())
135 sgen_bridge_reset_data ();
138 /* LOCKING: assumes the GC lock is held */
140 sgen_client_restart_world (int generation
, gboolean serial_collection
, gint64
*stw_time
)
143 TV_DECLARE (start_handshake
);
146 /* notify the profiler of the leftovers */
147 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
148 if (MONO_PROFILER_ENABLED (gc_moves
))
149 mono_sgen_gc_event_moves ();
151 if (MONO_PROFILER_ENABLED (gc_resize
))
152 mono_sgen_gc_event_resize ();
154 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_PRE_START_WORLD
, generation
, serial_collection
));
156 FOREACH_THREAD_ALL (info
) {
157 info
->client_info
.stack_start
= NULL
;
158 memset (&info
->client_info
.ctx
, 0, sizeof (MonoContext
));
161 TV_GETTIME (start_handshake
);
163 sgen_unified_suspend_restart_world ();
166 time_restart_world
+= TV_ELAPSED (start_handshake
, end_sw
);
167 usec
= TV_ELAPSED (stop_world_time
, end_sw
);
168 max_pause_usec
= MAX (usec
, max_pause_usec
);
169 end_of_last_stw
= end_sw
;
171 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec
, (int)max_pause_usec
);
173 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_POST_START_WORLD
, generation
, serial_collection
));
176 * We must release the thread info suspend lock after doing
177 * the thread handshake. Otherwise, if the GC stops the world
178 * and a thread is in the process of starting up, but has not
179 * yet registered (it's not in the thread_list), it is
180 * possible that the thread does register while the world is
181 * stopped. When restarting the GC will then try to restart
182 * said thread, but since it never got the suspend signal, it
183 * cannot answer the restart signal, so a deadlock results.
187 MONO_PROFILER_RAISE (gc_event
, (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED
, generation
, serial_collection
));
193 mono_sgen_init_stw (void)
195 mono_counters_register ("World stop", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_stop_world
);
196 mono_counters_register ("World restart", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_restart_world
);
199 /* Unified suspend code */
202 sgen_is_thread_in_current_stw (SgenThreadInfo
*info
, int *reason
)
205 * No need to check MONO_THREAD_INFO_FLAGS_NO_GC here as we rely on the
206 * FOREACH_THREAD_EXCLUDE macro to skip such threads for us.
210 We have detected that this thread is failing/dying, ignore it.
211 FIXME: can't we merge this with thread_is_dying?
213 if (info
->client_info
.skip
) {
220 Suspending the current thread will deadlock us, bad idea.
222 if (info
== mono_thread_info_current ()) {
229 We can't suspend the workers that will do all the heavy lifting.
230 FIXME Use some state bit in SgenThreadInfo for this.
232 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info
))) {
239 The thread has signaled that it started to detach, ignore it.
240 FIXME: can't we merge this with skip
242 if (!mono_thread_info_is_live (info
)) {
252 sgen_unified_suspend_stop_world (void)
254 int sleep_duration
= -1;
256 // we can't lead STW if we promised not to safepoint.
257 g_assert (!mono_thread_info_will_not_safepoint (mono_thread_info_current ()));
259 mono_threads_begin_global_suspend ();
260 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
262 for (MonoThreadSuspendPhase phase
= MONO_THREAD_SUSPEND_PHASE_INITIAL
; phase
< MONO_THREAD_SUSPEND_PHASE_COUNT
; phase
++) {
263 gboolean need_next_phase
= FALSE
;
264 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
265 /* look at every thread in the first phase. */
266 if (phase
== MONO_THREAD_SUSPEND_PHASE_INITIAL
) {
267 info
->client_info
.skip
= FALSE
;
268 info
->client_info
.suspend_done
= FALSE
;
270 /* skip threads suspended by previous phase. */
271 /* threads with info->client_info->skip set to TRUE will be skipped by sgen_is_thread_in_current_stw. */
272 if (info
->client_info
.suspend_done
)
277 if (!sgen_is_thread_in_current_stw (info
, &reason
)) {
278 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND-%d] IGNORE thread %p skip %s reason %d\n", (int)phase
, mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false", reason
);
282 switch (mono_thread_info_begin_suspend (info
, phase
)) {
283 case MONO_THREAD_BEGIN_SUSPEND_SUSPENDED
:
284 info
->client_info
.skip
= FALSE
;
286 case MONO_THREAD_BEGIN_SUSPEND_SKIP
:
287 info
->client_info
.skip
= TRUE
;
289 case MONO_THREAD_BEGIN_SUSPEND_NEXT_PHASE
:
290 need_next_phase
= TRUE
;
293 g_assert_not_reached ();
296 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND-%d] SUSPEND thread %p skip %s\n", (int)phase
, mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
297 } FOREACH_THREAD_END
;
299 mono_thread_info_current ()->client_info
.suspend_done
= TRUE
;
300 mono_threads_wait_pending_operations ();
302 if (!need_next_phase
)
307 gint restart_counter
= 0;
309 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
313 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
, &reason
)) {
314 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
, NULL
), reason
);
319 All threads that reach here are pristine suspended. This means the following:
321 - We haven't accepted the previous suspend as good.
322 - We haven't gave up on it for this STW (it's either bad or asked not to)
324 if (!mono_thread_info_in_critical_location (info
)) {
325 info
->client_info
.suspend_done
= TRUE
;
327 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info
));
331 suspend_count
= mono_thread_info_suspend_count (info
);
332 if (!(suspend_count
== 1))
333 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info
), suspend_count
);
335 info
->client_info
.skip
= !mono_thread_info_begin_pulse_resume_and_request_suspension (info
);
336 if (!info
->client_info
.skip
)
337 restart_counter
+= 1;
339 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
342 mono_threads_wait_pending_operations ();
344 if (restart_counter
== 0)
347 if (sleep_duration
< 0) {
348 mono_thread_info_yield ();
351 g_usleep (sleep_duration
);
352 sleep_duration
+= 10;
355 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
357 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
, &reason
)) {
358 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
, NULL
), reason
);
362 if (!mono_thread_info_is_running (info
)) {
363 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info
));
367 switch (mono_thread_info_begin_suspend (info
, MONO_THREAD_SUSPEND_PHASE_MOPUP
)) {
368 case MONO_THREAD_BEGIN_SUSPEND_SUSPENDED
:
369 info
->client_info
.skip
= FALSE
;
371 case MONO_THREAD_BEGIN_SUSPEND_SKIP
:
372 info
->client_info
.skip
= TRUE
;
374 case MONO_THREAD_BEGIN_SUSPEND_NEXT_PHASE
:
375 g_assert_not_reached ();
377 g_assert_not_reached ();
380 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
383 mono_threads_wait_pending_operations ();
386 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
390 if (!sgen_is_thread_in_current_stw (info
, &reason
)) {
391 g_assert (!info
->client_info
.suspend_done
|| info
== mono_thread_info_current ());
393 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info
), reason
);
397 g_assert (info
->client_info
.suspend_done
);
399 info
->client_info
.ctx
= mono_thread_info_get_suspend_state (info
)->ctx
;
401 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
402 info
->client_info
.stack_start
= (gpointer
) ((char*)MONO_CONTEXT_GET_SP (&info
->client_info
.ctx
) - REDZONE_SIZE
);
404 if (info
->client_info
.stack_start
< info
->client_info
.info
.stack_start_limit
405 || info
->client_info
.stack_start
>= info
->client_info
.info
.stack_end
) {
407 * Thread context is in unhandled state, most likely because it is
408 * dying. We don't scan it.
409 * FIXME We should probably rework and check the valid flag instead.
411 info
->client_info
.stack_start
= NULL
;
414 stopped_ip
= (gpointer
) (MONO_CONTEXT_GET_IP (&info
->client_info
.ctx
));
416 sgen_binary_protocol_thread_suspend ((gpointer
) mono_thread_info_get_tid (info
), stopped_ip
);
418 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
419 mono_thread_info_get_tid (info
), stopped_ip
, info
->client_info
.stack_start
, info
->client_info
.stack_start
? info
->client_info
.info
.stack_end
: NULL
);
424 sgen_unified_suspend_restart_world (void)
426 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
427 FOREACH_THREAD_EXCLUDE (info
, MONO_THREAD_INFO_FLAGS_NO_GC
) {
429 if (sgen_is_thread_in_current_stw (info
, &reason
)) {
430 g_assert (mono_thread_info_begin_resume (info
));
431 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info
));
433 sgen_binary_protocol_thread_restart ((gpointer
) mono_thread_info_get_tid (info
));
435 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info
), reason
);
439 mono_threads_wait_pending_operations ();
440 mono_threads_end_global_suspend ();