2010-03-12 Jb Evain <jbevain@novell.com>
[mono.git] / libgc / pthread_stop_world.c
blob04eb3712c263d9831af128e93c174c61e813635c
1 #include "private/pthread_support.h"
3 #if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
4 && !defined(GC_IRIX_THREADS) && !defined(GC_WIN32_THREADS) \
5 && !defined(GC_DARWIN_THREADS) && !defined(GC_AIX_THREADS)
7 #include <signal.h>
8 #include <semaphore.h>
9 #include <errno.h>
10 #include <unistd.h>
12 /* work around a dlopen issue (bug #75390), undefs to avoid warnings with redefinitions */
13 #undef PACKAGE_BUGREPORT
14 #undef PACKAGE_NAME
15 #undef PACKAGE_STRING
16 #undef PACKAGE_TARNAME
17 #undef PACKAGE_VERSION
18 #include "mono/utils/mono-compiler.h"
20 #ifdef MONO_DEBUGGER_SUPPORTED
21 #include "include/libgc-mono-debugger.h"
22 #endif
24 #if DEBUG_THREADS
26 #ifndef NSIG
27 # if defined(MAXSIG)
28 # define NSIG (MAXSIG+1)
29 # elif defined(_NSIG)
30 # define NSIG _NSIG
31 # elif defined(__SIGRTMAX)
32 # define NSIG (__SIGRTMAX+1)
33 # else
34 --> please fix it
35 # endif
36 #endif
38 void GC_print_sig_mask()
40 sigset_t blocked;
41 int i;
43 if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
44 ABORT("pthread_sigmask");
45 GC_printf0("Blocked: ");
46 for (i = 1; i < NSIG; i++) {
47 if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); }
49 GC_printf0("\n");
52 #endif
54 /* Remove the signals that we want to allow in thread stopping */
55 /* handler from a set. */
56 void GC_remove_allowed_signals(sigset_t *set)
58 # ifdef NO_SIGNALS
59 if (sigdelset(set, SIGINT) != 0
60 || sigdelset(set, SIGQUIT) != 0
61 || sigdelset(set, SIGABRT) != 0
62 || sigdelset(set, SIGTERM) != 0) {
63 ABORT("sigdelset() failed");
65 # endif
67 # ifdef MPROTECT_VDB
68 /* Handlers write to the thread structure, which is in the heap, */
69 /* and hence can trigger a protection fault. */
70 if (sigdelset(set, SIGSEGV) != 0
71 # ifdef SIGBUS
72 || sigdelset(set, SIGBUS) != 0
73 # endif
74 ) {
75 ABORT("sigdelset() failed");
77 # endif
80 static sigset_t suspend_handler_mask;
82 word GC_stop_count; /* Incremented at the beginning of GC_stop_world. */
84 #ifdef GC_OSF1_THREADS
85 GC_bool GC_retry_signals = TRUE;
86 #else
87 GC_bool GC_retry_signals = FALSE;
88 #endif
91 * We use signals to stop threads during GC.
93 * Suspended threads wait in signal handler for SIG_THR_RESTART.
94 * That's more portable than semaphores or condition variables.
95 * (We do use sem_post from a signal handler, but that should be portable.)
97 * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h.
98 * Note that we can't just stop a thread; we need it to save its stack
99 * pointer(s) and acknowledge.
102 #ifndef SIG_THR_RESTART
103 # if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
104 # ifdef _SIGRTMIN
105 # define SIG_THR_RESTART _SIGRTMIN + 5
106 # else
107 # define SIG_THR_RESTART SIGRTMIN + 5
108 # endif
109 # else
110 # define SIG_THR_RESTART SIGXCPU
111 # endif
112 #endif
114 sem_t GC_suspend_ack_sem;
116 static void _GC_suspend_handler(int sig)
118 int dummy;
119 pthread_t my_thread = pthread_self();
120 GC_thread me;
121 # ifdef PARALLEL_MARK
122 word my_mark_no = GC_mark_no;
123 /* Marker can't proceed until we acknowledge. Thus this is */
124 /* guaranteed to be the mark_no correspending to our */
125 /* suspension, i.e. the marker can't have incremented it yet. */
126 # endif
127 word my_stop_count = GC_stop_count;
129 if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
131 #if DEBUG_THREADS
132 GC_printf1("Suspending 0x%lx\n", my_thread);
133 #endif
135 me = GC_lookup_thread(my_thread);
136 /* The lookup here is safe, since I'm doing this on behalf */
137 /* of a thread which holds the allocation lock in order */
138 /* to stop the world. Thus concurrent modification of the */
139 /* data structure is impossible. */
140 if (me -> stop_info.last_stop_count == my_stop_count) {
141 /* Duplicate signal. OK if we are retrying. */
142 if (!GC_retry_signals) {
143 WARN("Duplicate suspend signal in thread %lx\n",
144 pthread_self());
146 return;
148 # ifdef SPARC
149 me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
150 # else
151 me -> stop_info.stack_ptr = (ptr_t)(&dummy);
152 # endif
153 # ifdef IA64
154 me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
155 # endif
157 /* Tell the thread that wants to stop the world that this */
158 /* thread has been stopped. Note that sem_post() is */
159 /* the only async-signal-safe primitive in LinuxThreads. */
160 sem_post(&GC_suspend_ack_sem);
161 me -> stop_info.last_stop_count = my_stop_count;
163 /* Wait until that thread tells us to restart by sending */
164 /* this thread a SIG_THR_RESTART signal. */
165 /* SIG_THR_RESTART should be masked at this point. Thus there */
166 /* is no race. */
167 do {
168 me->stop_info.signal = 0;
169 sigsuspend(&suspend_handler_mask); /* Wait for signal */
170 } while (me->stop_info.signal != SIG_THR_RESTART);
171 /* If the RESTART signal gets lost, we can still lose. That should be */
172 /* less likely than losing the SUSPEND signal, since we don't do much */
173 /* between the sem_post and sigsuspend. */
174 /* We'd need more handshaking to work around that, since we don't want */
175 /* to accidentally leave a RESTART signal pending, thus causing us to */
176 /* continue prematurely in a future round. */
178 /* Tell the thread that wants to start the world that this */
179 /* thread has been started. Note that sem_post() is */
180 /* the only async-signal-safe primitive in LinuxThreads. */
181 sem_post(&GC_suspend_ack_sem);
184 #if DEBUG_THREADS
185 GC_printf1("Continuing 0x%lx\n", my_thread);
186 #endif
189 void GC_suspend_handler(int sig)
191 int old_errno = errno;
192 _GC_suspend_handler(sig);
193 errno = old_errno;
196 static void _GC_restart_handler(int sig)
198 pthread_t my_thread = pthread_self();
199 GC_thread me;
201 if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler");
203 /* Let the GC_suspend_handler() know that we got a SIG_THR_RESTART. */
204 /* The lookup here is safe, since I'm doing this on behalf */
205 /* of a thread which holds the allocation lock in order */
206 /* to stop the world. Thus concurrent modification of the */
207 /* data structure is impossible. */
208 me = GC_lookup_thread(my_thread);
209 me->stop_info.signal = SIG_THR_RESTART;
212 ** Note: even if we didn't do anything useful here,
213 ** it would still be necessary to have a signal handler,
214 ** rather than ignoring the signals, otherwise
215 ** the signals will not be delivered at all, and
216 ** will thus not interrupt the sigsuspend() above.
219 #if DEBUG_THREADS
220 GC_printf1("In GC_restart_handler for 0x%lx\n", pthread_self());
221 #endif
224 # ifdef IA64
225 # define IF_IA64(x) x
226 # else
227 # define IF_IA64(x)
228 # endif
229 /* We hold allocation lock. Should do exactly the right thing if the */
230 /* world is stopped. Should not fail if it isn't. */
231 static void pthread_push_all_stacks()
233 GC_bool found_me = FALSE;
234 int i;
235 GC_thread p;
236 ptr_t lo, hi;
237 /* On IA64, we also need to scan the register backing store. */
238 IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
239 pthread_t me = pthread_self();
241 if (!GC_thr_initialized) GC_thr_init();
242 #if DEBUG_THREADS
243 GC_printf1("Pushing stacks from thread 0x%lx\n", (unsigned long) me);
244 #endif
245 for (i = 0; i < THREAD_TABLE_SZ; i++) {
246 for (p = GC_threads[i]; p != 0; p = p -> next) {
247 if (p -> flags & FINISHED) continue;
248 if (pthread_equal(p -> id, me)) {
249 # ifdef SPARC
250 lo = (ptr_t)GC_save_regs_in_stack();
251 # else
252 lo = GC_approx_sp();
253 # endif
254 found_me = TRUE;
255 IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();)
256 } else {
257 lo = p -> stop_info.stack_ptr;
258 IF_IA64(bs_hi = p -> backing_store_ptr;)
260 if ((p -> flags & MAIN_THREAD) == 0) {
261 hi = p -> stack_end;
262 IF_IA64(bs_lo = p -> backing_store_end);
263 } else {
264 /* The original stack. */
265 hi = GC_stackbottom;
266 IF_IA64(bs_lo = BACKING_STORE_BASE;)
268 #if DEBUG_THREADS
269 GC_printf3("Stack for thread 0x%lx = [%lx,%lx)\n",
270 (unsigned long) p -> id,
271 (unsigned long) lo, (unsigned long) hi);
272 #endif
273 if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
274 # ifdef STACK_GROWS_UP
275 /* We got them backwards! */
276 GC_push_all_stack(hi, lo);
277 # else
278 GC_push_all_stack(lo, hi);
279 # endif
280 # ifdef IA64
281 # if DEBUG_THREADS
282 GC_printf3("Reg stack for thread 0x%lx = [%lx,%lx)\n",
283 (unsigned long) p -> id,
284 (unsigned long) bs_lo, (unsigned long) bs_hi);
285 # endif
286 if (pthread_equal(p -> id, me)) {
287 GC_push_all_eager(bs_lo, bs_hi);
288 } else {
289 GC_push_all_stack(bs_lo, bs_hi);
291 # endif
294 if (!found_me && !GC_in_thread_creation)
295 ABORT("Collecting from unknown thread.");
298 void GC_restart_handler(int sig)
300 int old_errno = errno;
301 _GC_restart_handler (sig);
302 errno = old_errno;
305 /* We hold allocation lock. Should do exactly the right thing if the */
306 /* world is stopped. Should not fail if it isn't. */
307 void GC_push_all_stacks()
309 pthread_push_all_stacks();
312 /* There seems to be a very rare thread stopping problem. To help us */
313 /* debug that, we save the ids of the stopping thread. */
314 pthread_t GC_stopping_thread;
315 int GC_stopping_pid;
317 /* We hold the allocation lock. Suspend all threads that might */
318 /* still be running. Return the number of suspend signals that */
319 /* were sent. */
320 int GC_suspend_all()
322 int n_live_threads = 0;
323 int i;
324 GC_thread p;
325 int result;
326 pthread_t my_thread = pthread_self();
328 GC_stopping_thread = my_thread; /* debugging only. */
329 GC_stopping_pid = getpid(); /* debugging only. */
330 for (i = 0; i < THREAD_TABLE_SZ; i++) {
331 for (p = GC_threads[i]; p != 0; p = p -> next) {
332 if (p -> id != my_thread) {
333 if (p -> flags & FINISHED) continue;
334 if (p -> stop_info.last_stop_count == GC_stop_count) continue;
335 if (p -> thread_blocked) /* Will wait */ continue;
336 n_live_threads++;
337 #if DEBUG_THREADS
338 GC_printf1("Sending suspend signal to 0x%lx\n", p -> id);
339 #endif
341 result = pthread_kill(p -> id, SIG_SUSPEND);
342 switch(result) {
343 case ESRCH:
344 /* Not really there anymore. Possible? */
345 n_live_threads--;
346 break;
347 case 0:
348 break;
349 default:
350 ABORT("pthread_kill failed");
355 return n_live_threads;
358 /* Caller holds allocation lock. */
359 static void pthread_stop_world()
361 int i;
362 int n_live_threads;
363 int code;
365 #if DEBUG_THREADS
366 GC_printf1("Stopping the world from 0x%lx\n", pthread_self());
367 #endif
369 n_live_threads = GC_suspend_all();
371 if (GC_retry_signals) {
372 unsigned long wait_usecs = 0; /* Total wait since retry. */
373 # define WAIT_UNIT 3000
374 # define RETRY_INTERVAL 100000
375 for (;;) {
376 int ack_count;
378 sem_getvalue(&GC_suspend_ack_sem, &ack_count);
379 if (ack_count == n_live_threads) break;
380 if (wait_usecs > RETRY_INTERVAL) {
381 int newly_sent = GC_suspend_all();
383 # ifdef CONDPRINT
384 if (GC_print_stats) {
385 GC_printf1("Resent %ld signals after timeout\n",
386 newly_sent);
388 # endif
389 sem_getvalue(&GC_suspend_ack_sem, &ack_count);
390 if (newly_sent < n_live_threads - ack_count) {
391 WARN("Lost some threads during GC_stop_world?!\n",0);
392 n_live_threads = ack_count + newly_sent;
394 wait_usecs = 0;
396 usleep(WAIT_UNIT);
397 wait_usecs += WAIT_UNIT;
400 for (i = 0; i < n_live_threads; i++) {
401 while (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
402 if (errno != EINTR) {
403 GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
404 ABORT("sem_wait for handler failed");
408 #if DEBUG_THREADS
409 GC_printf1("World stopped from 0x%lx\n", pthread_self());
410 #endif
411 GC_stopping_thread = 0; /* debugging only */
414 /* Caller holds allocation lock. */
415 void GC_stop_world()
417 if (GC_notify_event)
418 GC_notify_event (GC_EVENT_PRE_STOP_WORLD);
419 /* Make sure all free list construction has stopped before we start. */
420 /* No new construction can start, since free list construction is */
421 /* required to acquire and release the GC lock before it starts, */
422 /* and we have the lock. */
423 # ifdef PARALLEL_MARK
424 GC_acquire_mark_lock();
425 GC_ASSERT(GC_fl_builder_count == 0);
426 /* We should have previously waited for it to become zero. */
427 # endif /* PARALLEL_MARK */
428 ++GC_stop_count;
429 #ifdef MONO_DEBUGGER_SUPPORTED
430 if (gc_thread_vtable && gc_thread_vtable->stop_world)
431 gc_thread_vtable->stop_world ();
432 else
433 #endif
434 pthread_stop_world ();
435 # ifdef PARALLEL_MARK
436 GC_release_mark_lock();
437 # endif
438 if (GC_notify_event)
439 GC_notify_event (GC_EVENT_POST_STOP_WORLD);
442 /* Caller holds allocation lock, and has held it continuously since */
443 /* the world stopped. */
444 static void pthread_start_world()
446 pthread_t my_thread = pthread_self();
447 register int i;
448 register GC_thread p;
449 register int n_live_threads = 0;
450 register int result;
451 int code;
453 # if DEBUG_THREADS
454 GC_printf0("World starting\n");
455 # endif
456 if (GC_notify_event)
457 GC_notify_event (GC_EVENT_PRE_START_WORLD);
459 for (i = 0; i < THREAD_TABLE_SZ; i++) {
460 for (p = GC_threads[i]; p != 0; p = p -> next) {
461 if (p -> id != my_thread) {
462 if (p -> flags & FINISHED) continue;
463 if (p -> thread_blocked) continue;
464 n_live_threads++;
465 #if DEBUG_THREADS
466 GC_printf1("Sending restart signal to 0x%lx\n", p -> id);
467 #endif
469 result = pthread_kill(p -> id, SIG_THR_RESTART);
470 switch(result) {
471 case ESRCH:
472 /* Not really there anymore. Possible? */
473 n_live_threads--;
474 break;
475 case 0:
476 break;
477 default:
478 ABORT("pthread_kill failed");
484 #if DEBUG_THREADS
485 GC_printf0 ("All threads signaled");
486 #endif
488 for (i = 0; i < n_live_threads; i++) {
489 while (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
490 if (errno != EINTR) {
491 GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
492 ABORT("sem_wait for handler failed");
497 if (GC_notify_event)
498 GC_notify_event (GC_EVENT_POST_START_WORLD);
499 #if DEBUG_THREADS
500 GC_printf0("World started\n");
501 #endif
504 void GC_start_world()
506 #ifdef MONO_DEBUGGER_SUPPORTED
507 if (gc_thread_vtable && gc_thread_vtable->start_world)
508 gc_thread_vtable->start_world();
509 else
510 #endif
511 pthread_start_world ();
514 static void pthread_stop_init() {
515 struct sigaction act;
517 if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0)
518 ABORT("sem_init failed");
520 act.sa_flags = SA_RESTART;
521 if (sigfillset(&act.sa_mask) != 0) {
522 ABORT("sigfillset() failed");
524 GC_remove_allowed_signals(&act.sa_mask);
525 /* SIG_THR_RESTART is set in the resulting mask. */
526 /* It is unmasked by the handler when necessary. */
527 act.sa_handler = GC_suspend_handler;
528 if (sigaction(SIG_SUSPEND, &act, NULL) != 0) {
529 ABORT("Cannot set SIG_SUSPEND handler");
532 act.sa_handler = GC_restart_handler;
533 if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) {
534 ABORT("Cannot set SIG_THR_RESTART handler");
537 /* Inititialize suspend_handler_mask. It excludes SIG_THR_RESTART. */
538 if (sigfillset(&suspend_handler_mask) != 0) ABORT("sigfillset() failed");
539 GC_remove_allowed_signals(&suspend_handler_mask);
540 if (sigdelset(&suspend_handler_mask, SIG_THR_RESTART) != 0)
541 ABORT("sigdelset() failed");
543 /* Check for GC_RETRY_SIGNALS. */
544 if (0 != GETENV("GC_RETRY_SIGNALS")) {
545 GC_retry_signals = TRUE;
547 if (0 != GETENV("GC_NO_RETRY_SIGNALS")) {
548 GC_retry_signals = FALSE;
550 # ifdef CONDPRINT
551 if (GC_print_stats && GC_retry_signals) {
552 GC_printf0("Will retry suspend signal if necessary.\n");
554 # endif
557 /* We hold the allocation lock. */
558 void GC_stop_init()
560 #ifdef MONO_DEBUGGER_SUPPORTED
561 if (gc_thread_vtable && gc_thread_vtable->initialize)
562 gc_thread_vtable->initialize ();
563 else
564 #endif
565 pthread_stop_init ();
568 #ifdef MONO_DEBUGGER_SUPPORTED
570 GCThreadFunctions *gc_thread_vtable = NULL;
572 void *
573 GC_mono_debugger_get_stack_ptr (void)
575 GC_thread me;
577 me = GC_lookup_thread (pthread_self ());
578 return &me->stop_info.stack_ptr;
581 #endif
583 #endif