Add man page for Gonzalo's new tool. It needs a better example, and better explanations
[mono-project.git] / libgc / pthread_stop_world.c
blob39e8d5d8eff33bca7037472344e25f8020c55ac7
1 #include "private/pthread_support.h"
3 #if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
4 && !defined(GC_IRIX_THREADS) && !defined(GC_WIN32_THREADS) \
5 && !defined(GC_DARWIN_THREADS) && !defined(GC_AIX_THREADS)
7 #include <signal.h>
8 #include <semaphore.h>
9 #include <errno.h>
10 #include <unistd.h>
12 /* work around a dlopen issue (bug #75390), undefs to avoid warnings with redefinitions */
13 #undef PACKAGE_BUGREPORT
14 #undef PACKAGE_NAME
15 #undef PACKAGE_STRING
16 #undef PACKAGE_TARNAME
17 #undef PACKAGE_VERSION
18 #include "mono/utils/mono-compiler.h"
20 #ifdef MONO_DEBUGGER_SUPPORTED
21 #include "include/libgc-mono-debugger.h"
22 #endif
24 #if DEBUG_THREADS
26 #ifndef NSIG
27 # if defined(MAXSIG)
28 # define NSIG (MAXSIG+1)
29 # elif defined(_NSIG)
30 # define NSIG _NSIG
31 # elif defined(__SIGRTMAX)
32 # define NSIG (__SIGRTMAX+1)
33 # else
34 --> please fix it
35 # endif
36 #endif
38 void GC_print_sig_mask()
40 sigset_t blocked;
41 int i;
43 if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
44 ABORT("pthread_sigmask");
45 GC_printf0("Blocked: ");
46 for (i = 1; i < NSIG; i++) {
47 if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); }
49 GC_printf0("\n");
52 #endif
54 /* Remove the signals that we want to allow in thread stopping */
55 /* handler from a set. */
56 void GC_remove_allowed_signals(sigset_t *set)
58 # ifdef NO_SIGNALS
59 if (sigdelset(set, SIGINT) != 0
60 || sigdelset(set, SIGQUIT) != 0
61 || sigdelset(set, SIGABRT) != 0
62 || sigdelset(set, SIGTERM) != 0) {
63 ABORT("sigdelset() failed");
65 # endif
67 # ifdef MPROTECT_VDB
68 /* Handlers write to the thread structure, which is in the heap, */
69 /* and hence can trigger a protection fault. */
70 if (sigdelset(set, SIGSEGV) != 0
71 # ifdef SIGBUS
72 || sigdelset(set, SIGBUS) != 0
73 # endif
74 ) {
75 ABORT("sigdelset() failed");
77 # endif
80 static sigset_t suspend_handler_mask;
82 word GC_stop_count; /* Incremented at the beginning of GC_stop_world. */
84 #ifdef GC_OSF1_THREADS
85 GC_bool GC_retry_signals = TRUE;
86 #else
87 GC_bool GC_retry_signals = FALSE;
88 #endif
91 * We use signals to stop threads during GC.
93 * Suspended threads wait in signal handler for SIG_THR_RESTART.
94 * That's more portable than semaphores or condition variables.
95 * (We do use sem_post from a signal handler, but that should be portable.)
97 * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h.
98 * Note that we can't just stop a thread; we need it to save its stack
99 * pointer(s) and acknowledge.
102 #ifndef SIG_THR_RESTART
103 # if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
104 # ifdef _SIGRTMIN
105 # define SIG_THR_RESTART _SIGRTMIN + 5
106 # else
107 # define SIG_THR_RESTART SIGRTMIN + 5
108 # endif
109 # else
110 # define SIG_THR_RESTART SIGXCPU
111 # endif
112 #endif
114 sem_t GC_suspend_ack_sem;
116 static void _GC_suspend_handler(int sig)
118 int dummy;
119 pthread_t my_thread = pthread_self();
120 GC_thread me;
121 # ifdef PARALLEL_MARK
122 word my_mark_no = GC_mark_no;
123 /* Marker can't proceed until we acknowledge. Thus this is */
124 /* guaranteed to be the mark_no correspending to our */
125 /* suspension, i.e. the marker can't have incremented it yet. */
126 # endif
127 word my_stop_count = GC_stop_count;
129 if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
131 #if DEBUG_THREADS
132 GC_printf1("Suspending 0x%lx\n", my_thread);
133 #endif
135 me = GC_lookup_thread(my_thread);
136 /* The lookup here is safe, since I'm doing this on behalf */
137 /* of a thread which holds the allocation lock in order */
138 /* to stop the world. Thus concurrent modification of the */
139 /* data structure is impossible. */
140 if (me -> stop_info.last_stop_count == my_stop_count) {
141 /* Duplicate signal. OK if we are retrying. */
142 if (!GC_retry_signals) {
143 WARN("Duplicate suspend signal in thread %lx\n",
144 pthread_self());
146 return;
148 # ifdef SPARC
149 me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
150 # else
151 me -> stop_info.stack_ptr = (ptr_t)(&dummy);
152 # endif
153 # ifdef IA64
154 me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
155 # endif
157 /* Tell the thread that wants to stop the world that this */
158 /* thread has been stopped. Note that sem_post() is */
159 /* the only async-signal-safe primitive in LinuxThreads. */
160 sem_post(&GC_suspend_ack_sem);
161 me -> stop_info.last_stop_count = my_stop_count;
163 /* Wait until that thread tells us to restart by sending */
164 /* this thread a SIG_THR_RESTART signal. */
165 /* SIG_THR_RESTART should be masked at this point. Thus there */
166 /* is no race. */
167 do {
168 me->stop_info.signal = 0;
169 sigsuspend(&suspend_handler_mask); /* Wait for signal */
170 } while (me->stop_info.signal != SIG_THR_RESTART);
171 /* If the RESTART signal gets lost, we can still lose. That should be */
172 /* less likely than losing the SUSPEND signal, since we don't do much */
173 /* between the sem_post and sigsuspend. */
174 /* We'd need more handshaking to work around that, since we don't want */
175 /* to accidentally leave a RESTART signal pending, thus causing us to */
176 /* continue prematurely in a future round. */
178 /* Tell the thread that wants to start the world that this */
179 /* thread has been started. Note that sem_post() is */
180 /* the only async-signal-safe primitive in LinuxThreads. */
181 sem_post(&GC_suspend_ack_sem);
184 #if DEBUG_THREADS
185 GC_printf1("Continuing 0x%lx\n", my_thread);
186 #endif
189 void GC_suspend_handler(int sig)
191 int old_errno = errno;
192 _GC_suspend_handler(sig);
193 errno = old_errno;
196 static void _GC_restart_handler(int sig)
198 pthread_t my_thread = pthread_self();
199 GC_thread me;
201 if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler");
203 /* Let the GC_suspend_handler() know that we got a SIG_THR_RESTART. */
204 /* The lookup here is safe, since I'm doing this on behalf */
205 /* of a thread which holds the allocation lock in order */
206 /* to stop the world. Thus concurrent modification of the */
207 /* data structure is impossible. */
208 me = GC_lookup_thread(my_thread);
209 me->stop_info.signal = SIG_THR_RESTART;
212 ** Note: even if we didn't do anything useful here,
213 ** it would still be necessary to have a signal handler,
214 ** rather than ignoring the signals, otherwise
215 ** the signals will not be delivered at all, and
216 ** will thus not interrupt the sigsuspend() above.
219 #if DEBUG_THREADS
220 GC_printf1("In GC_restart_handler for 0x%lx\n", pthread_self());
221 #endif
224 # ifdef IA64
225 # define IF_IA64(x) x
226 # else
227 # define IF_IA64(x)
228 # endif
229 /* We hold allocation lock. Should do exactly the right thing if the */
230 /* world is stopped. Should not fail if it isn't. */
231 static void pthread_push_all_stacks()
233 GC_bool found_me = FALSE;
234 int i;
235 GC_thread p;
236 ptr_t lo, hi;
237 /* On IA64, we also need to scan the register backing store. */
238 IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
239 pthread_t me = pthread_self();
241 if (!GC_thr_initialized) GC_thr_init();
242 #if DEBUG_THREADS
243 GC_printf1("Pushing stacks from thread 0x%lx\n", (unsigned long) me);
244 #endif
245 for (i = 0; i < THREAD_TABLE_SZ; i++) {
246 for (p = GC_threads[i]; p != 0; p = p -> next) {
247 if (p -> flags & FINISHED) continue;
248 if (pthread_equal(p -> id, me)) {
249 # ifdef SPARC
250 lo = (ptr_t)GC_save_regs_in_stack();
251 # else
252 lo = GC_approx_sp();
253 # endif
254 found_me = TRUE;
255 IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();)
256 } else {
257 lo = p -> stop_info.stack_ptr;
258 IF_IA64(bs_hi = p -> backing_store_ptr;)
260 if ((p -> flags & MAIN_THREAD) == 0) {
261 hi = p -> stack_end;
262 IF_IA64(bs_lo = p -> backing_store_end);
263 } else {
264 /* The original stack. */
265 hi = GC_stackbottom;
266 IF_IA64(bs_lo = BACKING_STORE_BASE;)
268 #if DEBUG_THREADS
269 GC_printf3("Stack for thread 0x%lx = [%lx,%lx)\n",
270 (unsigned long) p -> id,
271 (unsigned long) lo, (unsigned long) hi);
272 #endif
273 if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
274 # ifdef STACK_GROWS_UP
275 /* We got them backwards! */
276 GC_push_all_stack(hi, lo);
277 # else
278 GC_push_all_stack(lo, hi);
279 # endif
280 # ifdef IA64
281 # if DEBUG_THREADS
282 GC_printf3("Reg stack for thread 0x%lx = [%lx,%lx)\n",
283 (unsigned long) p -> id,
284 (unsigned long) bs_lo, (unsigned long) bs_hi);
285 # endif
286 if (pthread_equal(p -> id, me)) {
287 GC_push_all_eager(bs_lo, bs_hi);
288 } else {
289 GC_push_all_stack(bs_lo, bs_hi);
291 # endif
294 if (!found_me && !GC_in_thread_creation)
295 ABORT("Collecting from unknown thread.");
298 void GC_restart_handler(int sig)
300 int old_errno = errno;
301 _GC_restart_handler (sig);
302 errno = old_errno;
305 /* We hold allocation lock. Should do exactly the right thing if the */
306 /* world is stopped. Should not fail if it isn't. */
307 void GC_push_all_stacks()
309 pthread_push_all_stacks();
312 /* There seems to be a very rare thread stopping problem. To help us */
313 /* debug that, we save the ids of the stopping thread. */
314 pthread_t GC_stopping_thread;
315 int GC_stopping_pid;
317 /* We hold the allocation lock. Suspend all threads that might */
318 /* still be running. Return the number of suspend signals that */
319 /* were sent. */
320 int GC_suspend_all()
322 int n_live_threads = 0;
323 int i;
324 GC_thread p;
325 int result;
326 pthread_t my_thread = pthread_self();
328 GC_stopping_thread = my_thread; /* debugging only. */
329 GC_stopping_pid = getpid(); /* debugging only. */
330 for (i = 0; i < THREAD_TABLE_SZ; i++) {
331 for (p = GC_threads[i]; p != 0; p = p -> next) {
332 if (p -> id != my_thread) {
333 if (p -> flags & FINISHED) continue;
334 if (p -> stop_info.last_stop_count == GC_stop_count) continue;
335 if (p -> thread_blocked) /* Will wait */ continue;
336 n_live_threads++;
337 #if DEBUG_THREADS
338 GC_printf1("Sending suspend signal to 0x%lx\n", p -> id);
339 #endif
341 result = pthread_kill(p -> id, SIG_SUSPEND);
342 switch(result) {
343 case ESRCH:
344 /* Not really there anymore. Possible? */
345 n_live_threads--;
346 break;
347 case 0:
348 break;
349 default:
350 ABORT("pthread_kill failed");
355 return n_live_threads;
358 /* Caller holds allocation lock. */
359 static void pthread_stop_world()
361 int i;
362 int n_live_threads;
363 int code;
365 #if DEBUG_THREADS
366 GC_printf1("Stopping the world from 0x%lx\n", pthread_self());
367 #endif
369 n_live_threads = GC_suspend_all();
371 if (GC_retry_signals) {
372 unsigned long wait_usecs = 0; /* Total wait since retry. */
373 # define WAIT_UNIT 3000
374 # define RETRY_INTERVAL 100000
375 for (;;) {
376 int ack_count;
378 sem_getvalue(&GC_suspend_ack_sem, &ack_count);
379 if (ack_count == n_live_threads) break;
380 if (wait_usecs > RETRY_INTERVAL) {
381 int newly_sent = GC_suspend_all();
383 # ifdef CONDPRINT
384 if (GC_print_stats) {
385 GC_printf1("Resent %ld signals after timeout\n",
386 newly_sent);
388 # endif
389 sem_getvalue(&GC_suspend_ack_sem, &ack_count);
390 if (newly_sent < n_live_threads - ack_count) {
391 WARN("Lost some threads during GC_stop_world?!\n",0);
392 n_live_threads = ack_count + newly_sent;
394 wait_usecs = 0;
396 usleep(WAIT_UNIT);
397 wait_usecs += WAIT_UNIT;
400 for (i = 0; i < n_live_threads; i++) {
401 while (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
402 if (errno != EINTR) {
403 GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
404 ABORT("sem_wait for handler failed");
408 #if DEBUG_THREADS
409 GC_printf1("World stopped from 0x%lx\n", pthread_self());
410 #endif
411 GC_stopping_thread = 0; /* debugging only */
414 /* Caller holds allocation lock. */
415 void GC_stop_world()
417 /* Make sure all free list construction has stopped before we start. */
418 /* No new construction can start, since free list construction is */
419 /* required to acquire and release the GC lock before it starts, */
420 /* and we have the lock. */
421 # ifdef PARALLEL_MARK
422 GC_acquire_mark_lock();
423 GC_ASSERT(GC_fl_builder_count == 0);
424 /* We should have previously waited for it to become zero. */
425 # endif /* PARALLEL_MARK */
426 ++GC_stop_count;
427 #ifdef MONO_DEBUGGER_SUPPORTED
428 if (gc_thread_vtable && gc_thread_vtable->stop_world)
429 gc_thread_vtable->stop_world ();
430 else
431 #endif
432 pthread_stop_world ();
433 # ifdef PARALLEL_MARK
434 GC_release_mark_lock();
435 # endif
438 /* Caller holds allocation lock, and has held it continuously since */
439 /* the world stopped. */
440 static void pthread_start_world()
442 pthread_t my_thread = pthread_self();
443 register int i;
444 register GC_thread p;
445 register int n_live_threads = 0;
446 register int result;
447 int code;
449 # if DEBUG_THREADS
450 GC_printf0("World starting\n");
451 # endif
453 for (i = 0; i < THREAD_TABLE_SZ; i++) {
454 for (p = GC_threads[i]; p != 0; p = p -> next) {
455 if (p -> id != my_thread) {
456 if (p -> flags & FINISHED) continue;
457 if (p -> thread_blocked) continue;
458 n_live_threads++;
459 #if DEBUG_THREADS
460 GC_printf1("Sending restart signal to 0x%lx\n", p -> id);
461 #endif
463 result = pthread_kill(p -> id, SIG_THR_RESTART);
464 switch(result) {
465 case ESRCH:
466 /* Not really there anymore. Possible? */
467 n_live_threads--;
468 break;
469 case 0:
470 break;
471 default:
472 ABORT("pthread_kill failed");
478 #if DEBUG_THREADS
479 GC_printf0 ("All threads signaled");
480 #endif
482 for (i = 0; i < n_live_threads; i++) {
483 while (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
484 if (errno != EINTR) {
485 GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
486 ABORT("sem_wait for handler failed");
491 #if DEBUG_THREADS
492 GC_printf0("World started\n");
493 #endif
496 void GC_start_world()
498 #ifdef MONO_DEBUGGER_SUPPORTED
499 if (gc_thread_vtable && gc_thread_vtable->start_world)
500 gc_thread_vtable->start_world();
501 else
502 #endif
503 pthread_start_world ();
506 static void pthread_stop_init() {
507 struct sigaction act;
509 if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0)
510 ABORT("sem_init failed");
512 act.sa_flags = SA_RESTART;
513 if (sigfillset(&act.sa_mask) != 0) {
514 ABORT("sigfillset() failed");
516 GC_remove_allowed_signals(&act.sa_mask);
517 /* SIG_THR_RESTART is set in the resulting mask. */
518 /* It is unmasked by the handler when necessary. */
519 act.sa_handler = GC_suspend_handler;
520 if (sigaction(SIG_SUSPEND, &act, NULL) != 0) {
521 ABORT("Cannot set SIG_SUSPEND handler");
524 act.sa_handler = GC_restart_handler;
525 if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) {
526 ABORT("Cannot set SIG_THR_RESTART handler");
529 /* Inititialize suspend_handler_mask. It excludes SIG_THR_RESTART. */
530 if (sigfillset(&suspend_handler_mask) != 0) ABORT("sigfillset() failed");
531 GC_remove_allowed_signals(&suspend_handler_mask);
532 if (sigdelset(&suspend_handler_mask, SIG_THR_RESTART) != 0)
533 ABORT("sigdelset() failed");
535 /* Check for GC_RETRY_SIGNALS. */
536 if (0 != GETENV("GC_RETRY_SIGNALS")) {
537 GC_retry_signals = TRUE;
539 if (0 != GETENV("GC_NO_RETRY_SIGNALS")) {
540 GC_retry_signals = FALSE;
542 # ifdef CONDPRINT
543 if (GC_print_stats && GC_retry_signals) {
544 GC_printf0("Will retry suspend signal if necessary.\n");
546 # endif
549 /* We hold the allocation lock. */
550 void GC_stop_init()
552 #ifdef MONO_DEBUGGER_SUPPORTED
553 if (gc_thread_vtable && gc_thread_vtable->initialize)
554 gc_thread_vtable->initialize ();
555 else
556 #endif
557 pthread_stop_init ();
560 #ifdef MONO_DEBUGGER_SUPPORTED
562 GCThreadFunctions *gc_thread_vtable = NULL;
564 void
565 GC_mono_debugger_add_all_threads (void)
567 GC_thread p;
568 int i;
570 if (gc_thread_vtable && gc_thread_vtable->thread_created) {
571 for (i = 0; i < THREAD_TABLE_SZ; i++) {
572 for (p = GC_threads[i]; p != 0; p = p -> next) {
573 gc_thread_vtable->thread_created (p->id, &p->stop_info.stack_ptr);
579 #endif
581 #endif