2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28 #include "utils/mono-threads-debug.h"
30 #define TV_DECLARE SGEN_TV_DECLARE
31 #define TV_GETTIME SGEN_TV_GETTIME
32 #define TV_ELAPSED SGEN_TV_ELAPSED
34 static void sgen_unified_suspend_restart_world (void);
35 static void sgen_unified_suspend_stop_world (void);
37 static TV_DECLARE (end_of_last_stw
);
39 guint64
mono_time_since_last_stw ()
41 if (end_of_last_stw
== 0)
44 TV_DECLARE (current_time
);
45 TV_GETTIME (current_time
);
46 return TV_ELAPSED (end_of_last_stw
, current_time
);
49 unsigned int sgen_global_stop_count
= 0;
52 align_pointer (void *ptr
)
55 p
+= sizeof (gpointer
) - 1;
56 p
&= ~ (sizeof (gpointer
) - 1);
61 update_current_thread_stack (void *start
)
64 SgenThreadInfo
*info
= mono_thread_info_current ();
66 info
->client_info
.stack_start
= align_pointer (&stack_guard
);
67 g_assert (info
->client_info
.stack_start
);
68 g_assert (info
->client_info
.stack_start
>= info
->client_info
.stack_start_limit
&& info
->client_info
.stack_start
< info
->client_info
.stack_end
);
70 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
71 MONO_CONTEXT_GET_CURRENT (info
->client_info
.ctx
);
73 g_error ("Sgen STW requires a working mono-context");
76 if (mono_gc_get_gc_callbacks ()->thread_suspend_func
)
77 mono_gc_get_gc_callbacks ()->thread_suspend_func (info
->client_info
.runtime_data
, NULL
, &info
->client_info
.ctx
);
81 acquire_gc_locks (void)
84 mono_thread_info_suspend_lock ();
88 release_gc_locks (void)
90 mono_thread_info_suspend_unlock ();
94 static TV_DECLARE (stop_world_time
);
95 static unsigned long max_pause_usec
= 0;
97 static guint64 time_stop_world
;
98 static guint64 time_restart_world
;
100 /* LOCKING: assumes the GC lock is held */
102 sgen_client_stop_world (int generation
)
104 TV_DECLARE (end_handshake
);
106 /* notify the profiler of the leftovers */
107 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
108 if (G_UNLIKELY (mono_profiler_events
& MONO_PROFILE_GC_MOVES
))
109 mono_sgen_gc_event_moves ();
113 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED
, generation
);
115 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
116 sgen_process_togglerefs ();
118 update_current_thread_stack (&generation
);
120 sgen_global_stop_count
++;
121 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count
, mono_thread_info_current (), (gpointer
) (gsize
) mono_native_thread_id_get ());
122 TV_GETTIME (stop_world_time
);
124 sgen_unified_suspend_stop_world ();
126 SGEN_LOG (3, "world stopped");
128 TV_GETTIME (end_handshake
);
129 time_stop_world
+= TV_ELAPSED (stop_world_time
, end_handshake
);
131 sgen_memgov_collection_start (generation
);
132 if (sgen_need_bridge_processing ())
133 sgen_bridge_reset_data ();
136 /* LOCKING: assumes the GC lock is held */
138 sgen_client_restart_world (int generation
, gint64
*stw_time
)
141 TV_DECLARE (start_handshake
);
144 /* notify the profiler of the leftovers */
145 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146 if (G_UNLIKELY (mono_profiler_events
& MONO_PROFILE_GC_MOVES
))
147 mono_sgen_gc_event_moves ();
149 FOREACH_THREAD (info
) {
150 info
->client_info
.stack_start
= NULL
;
151 memset (&info
->client_info
.ctx
, 0, sizeof (MonoContext
));
154 TV_GETTIME (start_handshake
);
156 sgen_unified_suspend_restart_world ();
159 time_restart_world
+= TV_ELAPSED (start_handshake
, end_sw
);
160 usec
= TV_ELAPSED (stop_world_time
, end_sw
);
161 max_pause_usec
= MAX (usec
, max_pause_usec
);
162 end_of_last_stw
= end_sw
;
164 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec
, (int)max_pause_usec
);
167 * We must release the thread info suspend lock after doing
168 * the thread handshake. Otherwise, if the GC stops the world
169 * and a thread is in the process of starting up, but has not
170 * yet registered (it's not in the thread_list), it is
171 * possible that the thread does register while the world is
172 * stopped. When restarting the GC will then try to restart
173 * said thread, but since it never got the suspend signal, it
174 * cannot answer the restart signal, so a deadlock results.
178 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED
, generation
);
184 mono_sgen_init_stw (void)
186 mono_counters_register ("World stop", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_stop_world
);
187 mono_counters_register ("World restart", MONO_COUNTER_GC
| MONO_COUNTER_ULONG
| MONO_COUNTER_TIME
, &time_restart_world
);
190 /* Unified suspend code */
193 sgen_is_thread_in_current_stw (SgenThreadInfo
*info
, int *reason
)
196 A thread explicitly asked to be skiped because it holds no managed state.
197 This is used by TP and finalizer threads.
198 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
200 if (info
->client_info
.gc_disabled
) {
207 We have detected that this thread is failing/dying, ignore it.
208 FIXME: can't we merge this with thread_is_dying?
210 if (info
->client_info
.skip
) {
217 Suspending the current thread will deadlock us, bad idea.
219 if (info
== mono_thread_info_current ()) {
226 We can't suspend the workers that will do all the heavy lifting.
227 FIXME Use some state bit in SgenThreadInfo for this.
229 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info
))) {
236 The thread has signaled that it started to detach, ignore it.
237 FIXME: can't we merge this with skip
239 if (!mono_thread_info_is_live (info
)) {
249 sgen_unified_suspend_stop_world (void)
251 int sleep_duration
= -1;
253 mono_threads_begin_global_suspend ();
254 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
256 FOREACH_THREAD (info
) {
257 info
->client_info
.skip
= FALSE
;
258 info
->client_info
.suspend_done
= FALSE
;
261 if (!sgen_is_thread_in_current_stw (info
, &reason
)) {
262 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false", reason
);
266 info
->client_info
.skip
= !mono_thread_info_begin_suspend (info
);
268 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
271 mono_thread_info_current ()->client_info
.suspend_done
= TRUE
;
272 mono_threads_wait_pending_operations ();
275 gint restart_counter
= 0;
277 FOREACH_THREAD (info
) {
281 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
, &reason
)) {
282 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
, NULL
), reason
);
287 All threads that reach here are pristine suspended. This means the following:
289 - We haven't accepted the previous suspend as good.
290 - We haven't gave up on it for this STW (it's either bad or asked not to)
292 if (!mono_thread_info_in_critical_location (info
)) {
293 info
->client_info
.suspend_done
= TRUE
;
295 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info
));
299 suspend_count
= mono_thread_info_suspend_count (info
);
300 if (!(suspend_count
== 1))
301 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info
), suspend_count
);
303 info
->client_info
.skip
= !mono_thread_info_begin_resume (info
);
304 if (!info
->client_info
.skip
)
305 restart_counter
+= 1;
307 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
310 mono_threads_wait_pending_operations ();
312 if (restart_counter
== 0)
315 if (sleep_duration
< 0) {
316 mono_thread_info_yield ();
319 g_usleep (sleep_duration
);
320 sleep_duration
+= 10;
323 FOREACH_THREAD (info
) {
325 if (info
->client_info
.suspend_done
|| !sgen_is_thread_in_current_stw (info
, &reason
)) {
326 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info
), info
->client_info
.suspend_done
, !sgen_is_thread_in_current_stw (info
, NULL
), reason
);
330 if (!mono_thread_info_is_running (info
)) {
331 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info
));
335 info
->client_info
.skip
= !mono_thread_info_begin_suspend (info
);
337 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info
), info
->client_info
.skip
? "true" : "false");
340 mono_threads_wait_pending_operations ();
343 FOREACH_THREAD (info
) {
347 if (!sgen_is_thread_in_current_stw (info
, &reason
)) {
348 g_assert (!info
->client_info
.suspend_done
|| info
== mono_thread_info_current ());
350 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info
), reason
);
354 g_assert (info
->client_info
.suspend_done
);
356 info
->client_info
.ctx
= mono_thread_info_get_suspend_state (info
)->ctx
;
358 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
359 info
->client_info
.stack_start
= (gpointer
) ((char*)MONO_CONTEXT_GET_SP (&info
->client_info
.ctx
) - REDZONE_SIZE
);
361 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
362 if (!info
->client_info
.stack_start
363 || info
->client_info
.stack_start
< info
->client_info
.stack_start_limit
364 || info
->client_info
.stack_start
>= info
->client_info
.stack_end
) {
365 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
366 info
->client_info
.stack_start
, info
->client_info
.stack_start_limit
, info
->client_info
.stack_end
);
369 stopped_ip
= (gpointer
) (MONO_CONTEXT_GET_IP (&info
->client_info
.ctx
));
371 binary_protocol_thread_suspend ((gpointer
) mono_thread_info_get_tid (info
), stopped_ip
);
373 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
374 mono_thread_info_get_tid (info
), stopped_ip
, info
->client_info
.stack_start
, info
->client_info
.stack_start
? info
->client_info
.stack_end
: NULL
);
379 sgen_unified_suspend_restart_world (void)
381 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
382 FOREACH_THREAD (info
) {
384 if (sgen_is_thread_in_current_stw (info
, &reason
)) {
385 g_assert (mono_thread_info_begin_resume (info
));
386 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info
));
388 binary_protocol_thread_restart ((gpointer
) mono_thread_info_get_tid (info
));
390 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info
), reason
);
394 mono_threads_wait_pending_operations ();
395 mono_threads_end_global_suspend ();