Merge pull request #2202 from mono/revert-2090-mono-4.2.0-branch-bug25480
[mono-project.git] / mono / metadata / sgen-stw.c
blobbc899f5d59c2fe1ebfda9eadfa9e99e890112ff9
1 /*
2 * sgen-stw.c: Stop the world functionality
4 * Author:
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include "config.h"
28 #ifdef HAVE_SGEN_GC
30 #include "sgen/sgen-gc.h"
31 #include "sgen/sgen-protocol.h"
32 #include "sgen/sgen-memory-governor.h"
33 #include "sgen/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "sgen/sgen-client.h"
36 #include "metadata/sgen-bridge-internal.h"
37 #include "metadata/gc-internal.h"
39 #define TV_DECLARE SGEN_TV_DECLARE
40 #define TV_GETTIME SGEN_TV_GETTIME
41 #define TV_ELAPSED SGEN_TV_ELAPSED
43 static void sgen_unified_suspend_restart_world (void);
44 static void sgen_unified_suspend_stop_world (void);
46 unsigned int sgen_global_stop_count = 0;
48 inline static void*
49 align_pointer (void *ptr)
51 mword p = (mword)ptr;
52 p += sizeof (gpointer) - 1;
53 p &= ~ (sizeof (gpointer) - 1);
54 return (void*)p;
57 #ifdef USE_MONO_CTX
58 static MonoContext cur_thread_ctx;
59 #else
60 static mword cur_thread_regs [ARCH_NUM_REGS];
61 #endif
63 static void
64 update_current_thread_stack (void *start)
66 int stack_guard = 0;
67 #if !defined(USE_MONO_CTX)
68 void *reg_ptr = cur_thread_regs;
69 #endif
70 SgenThreadInfo *info = mono_thread_info_current ();
72 info->client_info.stack_start = align_pointer (&stack_guard);
73 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
74 #ifdef USE_MONO_CTX
75 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
76 memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
79 #else
80 ARCH_STORE_REGS (reg_ptr);
81 memcpy (&info->client_info.regs, reg_ptr, sizeof (info->client_info.regs));
82 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
83 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, NULL);
84 #endif
87 static gboolean
88 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
90 MonoJitInfo *ji;
92 if (!mono_thread_internal_current ())
93 /* Happens during thread attach */
94 return FALSE;
96 if (!ip || !domain)
97 return FALSE;
98 if (!sgen_has_critical_method ())
99 return FALSE;
102 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
103 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
104 * to register the jit info for all GC critical methods after they are JITted/loaded.
106 ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
107 if (!ji)
108 return FALSE;
110 return sgen_is_critical_method (mono_jit_info_get_method (ji));
113 static int
114 restart_threads_until_none_in_managed_allocator (void)
116 SgenThreadInfo *info;
117 int num_threads_died = 0;
118 int sleep_duration = -1;
120 for (;;) {
121 int restart_count = 0, restarted_count = 0;
122 /* restart all threads that stopped in the
123 allocator */
124 FOREACH_THREAD_SAFE (info) {
125 gboolean result;
126 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
127 continue;
128 if (mono_thread_info_is_live (info) &&
129 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
130 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
131 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
132 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
133 result = sgen_resume_thread (info);
134 if (result) {
135 ++restart_count;
136 } else {
137 info->client_info.skip = 1;
139 } else {
140 /* we set the stopped_ip to
141 NULL for threads which
142 we're not restarting so
143 that we can easily identify
144 the others */
145 info->client_info.stopped_ip = NULL;
146 info->client_info.stopped_domain = NULL;
147 info->client_info.suspend_done = TRUE;
149 } END_FOREACH_THREAD_SAFE
150 /* if no threads were restarted, we're done */
151 if (restart_count == 0)
152 break;
154 /* wait for the threads to signal their restart */
155 sgen_wait_for_suspend_ack (restart_count);
157 if (sleep_duration < 0) {
158 mono_thread_info_yield ();
159 sleep_duration = 0;
160 } else {
161 g_usleep (sleep_duration);
162 sleep_duration += 10;
165 /* stop them again */
166 FOREACH_THREAD (info) {
167 gboolean result;
168 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
169 continue;
170 result = sgen_suspend_thread (info);
172 if (result) {
173 ++restarted_count;
174 } else {
175 info->client_info.skip = 1;
177 } END_FOREACH_THREAD
178 /* some threads might have died */
179 num_threads_died += restart_count - restarted_count;
180 /* wait for the threads to signal their suspension
181 again */
182 sgen_wait_for_suspend_ack (restarted_count);
185 return num_threads_died;
188 static void
189 acquire_gc_locks (void)
191 LOCK_INTERRUPTION;
192 mono_thread_info_suspend_lock ();
195 static void
196 release_gc_locks (void)
198 mono_thread_info_suspend_unlock ();
199 UNLOCK_INTERRUPTION;
202 static TV_DECLARE (stop_world_time);
203 static unsigned long max_pause_usec = 0;
205 static guint64 time_stop_world;
206 static guint64 time_restart_world;
208 /* LOCKING: assumes the GC lock is held */
209 void
210 sgen_client_stop_world (int generation)
212 TV_DECLARE (end_handshake);
214 /* notify the profiler of the leftovers */
215 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
216 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
217 mono_sgen_gc_event_moves ();
219 acquire_gc_locks ();
221 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
222 sgen_process_togglerefs ();
224 update_current_thread_stack (&generation);
226 sgen_global_stop_count++;
227 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
228 TV_GETTIME (stop_world_time);
230 if (mono_thread_info_unified_management_enabled ()) {
231 sgen_unified_suspend_stop_world ();
232 } else {
233 int count, dead;
234 count = sgen_thread_handshake (TRUE);
235 dead = restart_threads_until_none_in_managed_allocator ();
236 if (count < dead)
237 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
240 SGEN_LOG (3, "world stopped");
242 TV_GETTIME (end_handshake);
243 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
245 sgen_memgov_collection_start (generation);
246 if (sgen_need_bridge_processing ())
247 sgen_bridge_reset_data ();
250 /* LOCKING: assumes the GC lock is held */
251 void
252 sgen_client_restart_world (int generation, GGTimingInfo *timing)
254 SgenThreadInfo *info;
255 TV_DECLARE (end_sw);
256 TV_DECLARE (start_handshake);
257 TV_DECLARE (end_bridge);
258 unsigned long usec, bridge_usec;
260 /* notify the profiler of the leftovers */
261 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
262 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
263 mono_sgen_gc_event_moves ();
265 FOREACH_THREAD (info) {
266 info->client_info.stack_start = NULL;
267 #ifdef USE_MONO_CTX
268 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
269 #else
270 memset (&info->client_info.regs, 0, sizeof (info->client_info.regs));
271 #endif
272 } END_FOREACH_THREAD
274 TV_GETTIME (start_handshake);
276 if (mono_thread_info_unified_management_enabled ())
277 sgen_unified_suspend_restart_world ();
278 else
279 sgen_thread_handshake (FALSE);
281 TV_GETTIME (end_sw);
282 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
283 usec = TV_ELAPSED (stop_world_time, end_sw);
284 max_pause_usec = MAX (usec, max_pause_usec);
286 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
289 * We must release the thread info suspend lock after doing
290 * the thread handshake. Otherwise, if the GC stops the world
291 * and a thread is in the process of starting up, but has not
292 * yet registered (it's not in the thread_list), it is
293 * possible that the thread does register while the world is
294 * stopped. When restarting the GC will then try to restart
295 * said thread, but since it never got the suspend signal, it
296 * cannot answer the restart signal, so a deadlock results.
298 release_gc_locks ();
300 TV_GETTIME (end_bridge);
301 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
303 if (timing) {
304 timing [0].stw_time = usec;
305 timing [0].bridge_time = bridge_usec;
309 void
310 mono_sgen_init_stw (void)
312 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
313 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
316 /* Unified suspend code */
318 static gboolean
319 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
322 A thread explicitly asked to be skiped because it holds no managed state.
323 This is used by TP and finalizer threads.
324 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
326 if (info->client_info.gc_disabled) {
327 return FALSE;
331 We have detected that this thread is failing/dying, ignore it.
332 FIXME: can't we merge this with thread_is_dying?
334 if (info->client_info.skip) {
335 return FALSE;
339 Suspending the current thread will deadlock us, bad idea.
341 if (info == mono_thread_info_current ()) {
342 return FALSE;
346 We can't suspend the workers that will do all the heavy lifting.
347 FIXME Use some state bit in SgenThreadInfo for this.
349 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
350 return FALSE;
354 The thread has signaled that it started to detach, ignore it.
355 FIXME: can't we merge this with skip
357 if (!mono_thread_info_is_live (info)) {
358 return FALSE;
361 return TRUE;
364 static void
365 update_sgen_info (SgenThreadInfo *info)
367 char *stack_start;
369 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
370 info->client_info.stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
371 info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
372 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
374 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
375 if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
376 g_error ("BAD STACK");
378 info->client_info.stack_start = stack_start;
379 #ifdef USE_MONO_CTX
380 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
381 #else
382 g_assert_not_reached ();
383 #endif
386 static void
387 sgen_unified_suspend_stop_world (void)
389 int restart_counter;
390 SgenThreadInfo *info;
391 int sleep_duration = -1;
393 mono_threads_begin_global_suspend ();
394 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
396 FOREACH_THREAD_SAFE (info) {
397 info->client_info.skip = FALSE;
398 info->client_info.suspend_done = FALSE;
399 if (sgen_is_thread_in_current_stw (info)) {
400 info->client_info.skip = !mono_thread_info_begin_suspend (info, FALSE);
401 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
402 } else {
403 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
405 } END_FOREACH_THREAD_SAFE
407 mono_thread_info_current ()->client_info.suspend_done = TRUE;
408 mono_threads_wait_pending_operations ();
410 for (;;) {
411 restart_counter = 0;
412 FOREACH_THREAD_SAFE (info) {
413 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
414 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
415 continue;
419 All threads that reach here are pristine suspended. This means the following:
421 - We haven't accepted the previous suspend as good.
422 - We haven't gave up on it for this STW (it's either bad or asked not to)
424 if (!mono_threads_core_check_suspend_result (info)) {
425 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
426 info->client_info.skip = TRUE;
427 } else if (mono_thread_info_in_critical_location (info)) {
428 gboolean res;
429 g_assert (mono_thread_info_suspend_count (info) == 1);
430 res = mono_thread_info_begin_resume (info);
431 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
432 if (res)
433 ++restart_counter;
434 else
435 info->client_info.skip = TRUE;
436 } else {
437 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
438 g_assert (!info->client_info.in_critical_region);
439 info->client_info.suspend_done = TRUE;
441 } END_FOREACH_THREAD_SAFE
443 if (restart_counter == 0)
444 break;
445 mono_threads_wait_pending_operations ();
447 if (sleep_duration < 0) {
448 #ifdef HOST_WIN32
449 SwitchToThread ();
450 #else
451 sched_yield ();
452 #endif
453 sleep_duration = 0;
454 } else {
455 g_usleep (sleep_duration);
456 sleep_duration += 10;
459 FOREACH_THREAD_SAFE (info) {
460 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
461 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
462 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
463 if (!res)
464 info->client_info.skip = TRUE;
466 } END_FOREACH_THREAD_SAFE
468 mono_threads_wait_pending_operations ();
471 FOREACH_THREAD_SAFE (info) {
472 if (sgen_is_thread_in_current_stw (info)) {
473 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
474 g_assert (info->client_info.suspend_done);
475 update_sgen_info (info);
476 } else {
477 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
479 } END_FOREACH_THREAD_SAFE
482 static void
483 sgen_unified_suspend_restart_world (void)
485 SgenThreadInfo *info;
487 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
488 FOREACH_THREAD_SAFE (info) {
489 if (sgen_is_thread_in_current_stw (info)) {
490 g_assert (mono_thread_info_begin_resume (info));
491 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
492 } else {
493 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
495 } END_FOREACH_THREAD_SAFE
497 mono_threads_wait_pending_operations ();
498 mono_threads_end_global_suspend ();
500 #endif