Update.
[glibc.git] / linuxthreads / manager.c
blobc846895d19c87b09ab9fb15d886e87b4651c2d8b
1 /* Linuxthreads - a simple clone()-based implementation of Posix */
2 /* threads for Linux. */
3 /* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr) */
4 /* */
5 /* This program is free software; you can redistribute it and/or */
6 /* modify it under the terms of the GNU Library General Public License */
7 /* as published by the Free Software Foundation; either version 2 */
8 /* of the License, or (at your option) any later version. */
9 /* */
10 /* This program is distributed in the hope that it will be useful, */
11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
13 /* GNU Library General Public License for more details. */
15 /* The "thread manager" thread: manages creation and termination of threads */
17 #include <assert.h>
18 #include <errno.h>
19 #include <sched.h>
20 #include <stddef.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/poll.h> /* for poll */
26 #include <sys/mman.h> /* for mmap */
27 #include <sys/param.h>
28 #include <sys/time.h>
29 #include <sys/wait.h> /* for waitpid macros */
31 #include <ldsodefs.h>
32 #include "pthread.h"
33 #include "internals.h"
34 #include "spinlock.h"
35 #include "restart.h"
36 #include "semaphore.h"
38 /* Array of active threads. Entry 0 is reserved for the initial thread. */
39 struct pthread_handle_struct __pthread_handles[PTHREAD_THREADS_MAX]
40 #ifdef USE_TLS
41 # if __LT_SPINLOCK_INIT != 0
42 = {
43 { __LOCK_INITIALIZER, NULL, 0},
44 { __LOCK_INITIALIZER, NULL, 0},
45 /* All NULLs */
47 # endif
48 #else
49 = {
50 { __LOCK_INITIALIZER, &__pthread_initial_thread, 0},
51 { __LOCK_INITIALIZER, &__pthread_manager_thread, 0},
52 /* All NULLs */
54 #endif
57 /* For debugging purposes put the maximum number of threads in a variable. */
58 const int __linuxthreads_pthread_threads_max = PTHREAD_THREADS_MAX;
60 #ifndef THREAD_SELF
61 /* Indicate whether at least one thread has a user-defined stack (if 1),
62 or if all threads have stacks supplied by LinuxThreads (if 0). */
63 int __pthread_nonstandard_stacks;
64 #endif
66 /* Number of active entries in __pthread_handles (used by gdb) */
67 volatile int __pthread_handles_num = 2;
69 /* Whether to use debugger additional actions for thread creation
70 (set to 1 by gdb) */
71 volatile int __pthread_threads_debug;
73 /* Globally enabled events. */
74 volatile td_thr_events_t __pthread_threads_events;
76 /* Pointer to thread descriptor with last event. */
77 volatile pthread_descr __pthread_last_event;
79 static pthread_descr manager_thread;
81 /* Mapping from stack segment to thread descriptor. */
82 /* Stack segment numbers are also indices into the __pthread_handles array. */
83 /* Stack segment number 0 is reserved for the initial thread. */
85 #if FLOATING_STACKS
86 # define thread_segment(seq) NULL
87 #else
88 static inline pthread_descr thread_segment(int seg)
90 return (pthread_descr)(THREAD_STACK_START_ADDRESS - (seg - 1) * STACK_SIZE)
91 - 1;
93 #endif
95 /* Flag set in signal handler to record child termination */
97 static volatile int terminated_children;
99 /* Flag set when the initial thread is blocked on pthread_exit waiting
100 for all other threads to terminate */
102 static int main_thread_exiting;
104 /* Counter used to generate unique thread identifier.
105 Thread identifier is pthread_threads_counter + segment. */
107 static pthread_t pthread_threads_counter;
109 /* Forward declarations */
111 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
112 void * (*start_routine)(void *), void *arg,
113 sigset_t *mask, int father_pid,
114 int report_events,
115 td_thr_events_t *event_maskp);
116 static void pthread_handle_free(pthread_t th_id);
117 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
118 __attribute__ ((noreturn));
119 static void pthread_reap_children(void);
120 static void pthread_kill_all_threads(int sig, int main_thread_also);
121 static void pthread_for_each_thread(void *arg,
122 void (*fn)(void *, pthread_descr));
124 /* The server thread managing requests for thread creation and termination */
127 __attribute__ ((noreturn))
128 __pthread_manager(void *arg)
130 pthread_descr self = manager_thread = arg;
131 int reqfd = __pthread_manager_reader;
132 struct pollfd ufd;
133 sigset_t manager_mask;
134 int n;
135 struct pthread_request request;
137 /* If we have special thread_self processing, initialize it. */
138 #ifdef INIT_THREAD_SELF
139 INIT_THREAD_SELF(self, 1);
140 #endif
141 /* Set the error variable. */
142 self->p_errnop = &self->p_errno;
143 self->p_h_errnop = &self->p_h_errno;
144 /* Block all signals except __pthread_sig_cancel and SIGTRAP */
145 sigfillset(&manager_mask);
146 sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
147 sigdelset(&manager_mask, SIGTRAP); /* for debugging purposes */
148 if (__pthread_threads_debug && __pthread_sig_debug > 0)
149 sigdelset(&manager_mask, __pthread_sig_debug);
150 sigprocmask(SIG_SETMASK, &manager_mask, NULL);
151 /* Raise our priority to match that of main thread */
152 __pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
153 /* Synchronize debugging of the thread manager */
154 n = TEMP_FAILURE_RETRY(__libc_read(reqfd, (char *)&request,
155 sizeof(request)));
156 ASSERT(n == sizeof(request) && request.req_kind == REQ_DEBUG);
157 ufd.fd = reqfd;
158 ufd.events = POLLIN;
159 /* Enter server loop */
160 while(1) {
161 n = __poll(&ufd, 1, 2000);
163 /* Check for termination of the main thread */
164 if (getppid() == 1) {
165 pthread_kill_all_threads(SIGKILL, 0);
166 _exit(0);
168 /* Check for dead children */
169 if (terminated_children) {
170 terminated_children = 0;
171 pthread_reap_children();
173 /* Read and execute request */
174 if (n == 1 && (ufd.revents & POLLIN)) {
175 n = TEMP_FAILURE_RETRY(__libc_read(reqfd, (char *)&request,
176 sizeof(request)));
177 #ifdef DEBUG
178 if (n < 0) {
179 char d[64];
180 write(STDERR_FILENO, d, snprintf(d, sizeof(d), "*** read err %m\n"));
181 } else if (n != sizeof(request)) {
182 write(STDERR_FILENO, "*** short read in manager\n", 26);
184 #endif
186 switch(request.req_kind) {
187 case REQ_CREATE:
188 request.req_thread->p_retcode =
189 pthread_handle_create((pthread_t *) &request.req_thread->p_retval,
190 request.req_args.create.attr,
191 request.req_args.create.fn,
192 request.req_args.create.arg,
193 &request.req_args.create.mask,
194 request.req_thread->p_pid,
195 request.req_thread->p_report_events,
196 &request.req_thread->p_eventbuf.eventmask);
197 restart(request.req_thread);
198 break;
199 case REQ_FREE:
200 pthread_handle_free(request.req_args.free.thread_id);
201 break;
202 case REQ_PROCESS_EXIT:
203 pthread_handle_exit(request.req_thread,
204 request.req_args.exit.code);
205 /* NOTREACHED */
206 break;
207 case REQ_MAIN_THREAD_EXIT:
208 main_thread_exiting = 1;
209 /* Reap children in case all other threads died and the signal handler
210 went off before we set main_thread_exiting to 1, and therefore did
211 not do REQ_KICK. */
212 pthread_reap_children();
214 if (__pthread_main_thread->p_nextlive == __pthread_main_thread) {
215 restart(__pthread_main_thread);
216 /* The main thread will now call exit() which will trigger an
217 __on_exit handler, which in turn will send REQ_PROCESS_EXIT
218 to the thread manager. In case you are wondering how the
219 manager terminates from its loop here. */
221 break;
222 case REQ_POST:
223 __new_sem_post(request.req_args.post);
224 break;
225 case REQ_DEBUG:
226 /* Make gdb aware of new thread and gdb will restart the
227 new thread when it is ready to handle the new thread. */
228 if (__pthread_threads_debug && __pthread_sig_debug > 0)
229 raise(__pthread_sig_debug);
230 break;
231 case REQ_KICK:
232 /* This is just a prod to get the manager to reap some
233 threads right away, avoiding a potential delay at shutdown. */
234 break;
235 case REQ_FOR_EACH_THREAD:
236 pthread_for_each_thread(request.req_args.for_each.arg,
237 request.req_args.for_each.fn);
238 restart(request.req_thread);
239 break;
245 int __pthread_manager_event(void *arg)
247 /* If we have special thread_self processing, initialize it. */
248 #ifdef INIT_THREAD_SELF
249 INIT_THREAD_SELF(arg, 1);
250 #endif
252 /* Get the lock the manager will free once all is correctly set up. */
253 __pthread_lock (THREAD_GETMEM(((pthread_descr) arg), p_lock), NULL);
254 /* Free it immediately. */
255 __pthread_unlock (THREAD_GETMEM(((pthread_descr) arg), p_lock));
257 return __pthread_manager(arg);
260 /* Process creation */
262 static int
263 __attribute__ ((noreturn))
264 pthread_start_thread(void *arg)
266 pthread_descr self = (pthread_descr) arg;
267 struct pthread_request request;
268 void * outcome;
269 #if HP_TIMING_AVAIL
270 hp_timing_t tmpclock;
271 #endif
272 /* Initialize special thread_self processing, if any. */
273 #ifdef INIT_THREAD_SELF
274 INIT_THREAD_SELF(self, self->p_nr);
275 #endif
276 #if HP_TIMING_AVAIL
277 HP_TIMING_NOW (tmpclock);
278 THREAD_SETMEM (self, p_cpuclock_offset, tmpclock);
279 #endif
280 /* Make sure our pid field is initialized, just in case we get there
281 before our father has initialized it. */
282 THREAD_SETMEM(self, p_pid, __getpid());
283 /* Initial signal mask is that of the creating thread. (Otherwise,
284 we'd just inherit the mask of the thread manager.) */
285 sigprocmask(SIG_SETMASK, &self->p_start_args.mask, NULL);
286 /* Set the scheduling policy and priority for the new thread, if needed */
287 if (THREAD_GETMEM(self, p_start_args.schedpolicy) >= 0)
288 /* Explicit scheduling attributes were provided: apply them */
289 __sched_setscheduler(THREAD_GETMEM(self, p_pid),
290 THREAD_GETMEM(self, p_start_args.schedpolicy),
291 &self->p_start_args.schedparam);
292 else if (manager_thread->p_priority > 0)
293 /* Default scheduling required, but thread manager runs in realtime
294 scheduling: switch new thread to SCHED_OTHER policy */
296 struct sched_param default_params;
297 default_params.sched_priority = 0;
298 __sched_setscheduler(THREAD_GETMEM(self, p_pid),
299 SCHED_OTHER, &default_params);
301 /* Make gdb aware of new thread */
302 if (__pthread_threads_debug && __pthread_sig_debug > 0) {
303 request.req_thread = self;
304 request.req_kind = REQ_DEBUG;
305 TEMP_FAILURE_RETRY(__libc_write(__pthread_manager_request,
306 (char *) &request, sizeof(request)));
307 suspend(self);
309 /* Run the thread code */
310 outcome = self->p_start_args.start_routine(THREAD_GETMEM(self,
311 p_start_args.arg));
312 /* Exit with the given return value */
313 __pthread_do_exit(outcome, CURRENT_STACK_FRAME);
316 static int
317 __attribute__ ((noreturn))
318 pthread_start_thread_event(void *arg)
320 pthread_descr self = (pthread_descr) arg;
322 #ifdef INIT_THREAD_SELF
323 INIT_THREAD_SELF(self, self->p_nr);
324 #endif
325 /* Make sure our pid field is initialized, just in case we get there
326 before our father has initialized it. */
327 THREAD_SETMEM(self, p_pid, __getpid());
328 /* Get the lock the manager will free once all is correctly set up. */
329 __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
330 /* Free it immediately. */
331 __pthread_unlock (THREAD_GETMEM(self, p_lock));
333 /* Continue with the real function. */
334 pthread_start_thread (arg);
337 #if defined USE_TLS && !FLOATING_STACKS
338 # error "TLS can only work with floating stacks"
339 #endif
341 static int pthread_allocate_stack(const pthread_attr_t *attr,
342 pthread_descr default_new_thread,
343 int pagesize,
344 char ** out_new_thread,
345 char ** out_new_thread_bottom,
346 char ** out_guardaddr,
347 size_t * out_guardsize)
349 pthread_descr new_thread;
350 char * new_thread_bottom;
351 char * guardaddr;
352 size_t stacksize, guardsize;
354 #ifdef USE_TLS
355 /* TLS cannot work with fixed thread descriptor addresses. */
356 assert (default_new_thread == NULL);
357 #endif
359 if (attr != NULL && attr->__stackaddr_set)
361 #ifdef _STACK_GROWS_UP
362 /* The user provided a stack. */
363 # ifdef USE_TLS
364 /* This value is not needed. */
365 new_thread = (pthread_descr) attr->__stackaddr;
366 new_thread_bottom = (char *) new_thread;
367 # else
368 new_thread = (pthread_descr) attr->__stackaddr;
369 new_thread_bottom = (char *) (new_thread + 1);
370 # endif
371 guardaddr = attr->__stackaddr + attr->__stacksize;
372 guardsize = 0;
373 #else
374 /* The user provided a stack. For now we interpret the supplied
375 address as 1 + the highest addr. in the stack segment. If a
376 separate register stack is needed, we place it at the low end
377 of the segment, relying on the associated stacksize to
378 determine the low end of the segment. This differs from many
379 (but not all) other pthreads implementations. The intent is
380 that on machines with a single stack growing toward higher
381 addresses, stackaddr would be the lowest address in the stack
382 segment, so that it is consistently close to the initial sp
383 value. */
384 # ifdef USE_TLS
385 new_thread = (pthread_descr) attr->__stackaddr;
386 # else
387 new_thread =
388 (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
389 # endif
390 new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
391 guardaddr = new_thread_bottom;
392 guardsize = 0;
393 #endif
394 #ifndef THREAD_SELF
395 __pthread_nonstandard_stacks = 1;
396 #endif
397 #ifndef USE_TLS
398 /* Clear the thread data structure. */
399 memset (new_thread, '\0', sizeof (*new_thread));
400 #endif
402 else
404 #ifdef NEED_SEPARATE_REGISTER_STACK
405 const size_t granularity = 2 * pagesize;
406 /* Try to make stacksize/2 a multiple of pagesize */
407 #else
408 const size_t granularity = pagesize;
409 #endif
410 void *map_addr;
412 /* Allocate space for stack and thread descriptor at default address */
413 #if FLOATING_STACKS
414 if (attr != NULL)
416 guardsize = page_roundup (attr->__guardsize, granularity);
417 stacksize = __pthread_max_stacksize - guardsize;
418 stacksize = MIN (stacksize,
419 page_roundup (attr->__stacksize, granularity));
421 else
423 guardsize = granularity;
424 stacksize = __pthread_max_stacksize - guardsize;
427 map_addr = mmap(NULL, stacksize + guardsize,
428 PROT_READ | PROT_WRITE | PROT_EXEC,
429 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
430 if (map_addr == MAP_FAILED)
431 /* No more memory available. */
432 return -1;
434 # ifdef NEED_SEPARATE_REGISTER_STACK
435 guardaddr = map_addr + stacksize / 2;
436 if (guardsize > 0)
437 mprotect (guardaddr, guardsize, PROT_NONE);
439 new_thread_bottom = (char *) map_addr;
440 # ifdef USE_TLS
441 new_thread = ((pthread_descr) (new_thread_bottom + stacksize
442 + guardsize));
443 # else
444 new_thread = ((pthread_descr) (new_thread_bottom + stacksize
445 + guardsize)) - 1;
446 # endif
447 # elif _STACK_GROWS_DOWN
448 guardaddr = map_addr;
449 if (guardsize > 0)
450 mprotect (guardaddr, guardsize, PROT_NONE);
452 new_thread_bottom = (char *) map_addr + guardsize;
453 # ifdef USE_TLS
454 new_thread = ((pthread_descr) (new_thread_bottom + stacksize));
455 # else
456 new_thread = ((pthread_descr) (new_thread_bottom + stacksize)) - 1;
457 # endif
458 # elif _STACK_GROWS_UP
459 guardaddr = map_addr + stacksize;
460 if (guardsize > 0)
461 mprotect (guardaddr, guardsize, PROT_NONE);
463 new_thread = (pthread_descr) map_addr;
464 # ifdef USE_TLS
465 new_thread_bottom = (char *) new_thread;
466 # else
467 new_thread_bottom = (char *) (new_thread + 1);
468 # endif
469 # else
470 # error You must define a stack direction
471 # endif /* Stack direction */
472 #else /* !FLOATING_STACKS */
473 void *res_addr;
475 if (attr != NULL)
477 guardsize = page_roundup (attr->__guardsize, granularity);
478 stacksize = STACK_SIZE - guardsize;
479 stacksize = MIN (stacksize,
480 page_roundup (attr->__stacksize, granularity));
482 else
484 guardsize = granularity;
485 stacksize = STACK_SIZE - granularity;
488 # ifdef NEED_SEPARATE_REGISTER_STACK
489 new_thread = default_new_thread;
490 new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize;
491 /* Includes guard area, unlike the normal case. Use the bottom
492 end of the segment as backing store for the register stack.
493 Needed on IA64. In this case, we also map the entire stack at
494 once. According to David Mosberger, that's cheaper. It also
495 avoids the risk of intermittent failures due to other mappings
496 in the same region. The cost is that we might be able to map
497 slightly fewer stacks. */
499 /* First the main stack: */
500 map_addr = (caddr_t)((char *)(new_thread + 1) - stacksize / 2);
501 res_addr = mmap(map_addr, stacksize / 2,
502 PROT_READ | PROT_WRITE | PROT_EXEC,
503 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
504 if (res_addr != map_addr)
506 /* Bad luck, this segment is already mapped. */
507 if (res_addr != MAP_FAILED)
508 munmap(res_addr, stacksize / 2);
509 return -1;
511 /* Then the register stack: */
512 map_addr = (caddr_t)new_thread_bottom;
513 res_addr = mmap(map_addr, stacksize/2,
514 PROT_READ | PROT_WRITE | PROT_EXEC,
515 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
516 if (res_addr != map_addr)
518 if (res_addr != MAP_FAILED)
519 munmap(res_addr, stacksize / 2);
520 munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2),
521 stacksize/2);
522 return -1;
525 guardaddr = new_thread_bottom + stacksize/2;
526 /* We leave the guard area in the middle unmapped. */
527 # else /* !NEED_SEPARATE_REGISTER_STACK */
528 # ifdef _STACK_GROWS_DOWN
529 new_thread = default_new_thread;
530 new_thread_bottom = (char *) (new_thread + 1) - stacksize;
531 map_addr = new_thread_bottom - guardsize;
532 res_addr = mmap(map_addr, stacksize + guardsize,
533 PROT_READ | PROT_WRITE | PROT_EXEC,
534 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
535 if (res_addr != map_addr)
537 /* Bad luck, this segment is already mapped. */
538 if (res_addr != MAP_FAILED)
539 munmap (res_addr, stacksize + guardsize);
540 return -1;
543 /* We manage to get a stack. Protect the guard area pages if
544 necessary. */
545 guardaddr = map_addr;
546 if (guardsize > 0)
547 mprotect (guardaddr, guardsize, PROT_NONE);
548 # else
549 /* The thread description goes at the bottom of this area, and
550 * the stack starts directly above it.
552 new_thread = (pthread_descr)((unsigned long)default_new_thread &~ (STACK_SIZE - 1));
553 map_addr = mmap(new_thread, stacksize + guardsize,
554 PROT_READ | PROT_WRITE | PROT_EXEC,
555 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
556 if (map_addr == MAP_FAILED)
557 return -1;
559 new_thread_bottom = map_addr + sizeof(*new_thread);
560 guardaddr = map_addr + stacksize;
561 if (guardsize > 0)
562 mprotect (guardaddr, guardsize, PROT_NONE);
564 # endif /* stack direction */
565 # endif /* !NEED_SEPARATE_REGISTER_STACK */
566 #endif /* !FLOATING_STACKS */
568 *out_new_thread = (char *) new_thread;
569 *out_new_thread_bottom = new_thread_bottom;
570 *out_guardaddr = guardaddr;
571 *out_guardsize = guardsize;
572 return 0;
575 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
576 void * (*start_routine)(void *), void *arg,
577 sigset_t * mask, int father_pid,
578 int report_events,
579 td_thr_events_t *event_maskp)
581 size_t sseg;
582 int pid;
583 pthread_descr new_thread;
584 char *stack_addr;
585 char * new_thread_bottom;
586 pthread_t new_thread_id;
587 char *guardaddr = NULL;
588 size_t guardsize = 0;
589 int pagesize = __getpagesize();
590 int saved_errno = 0;
592 #ifdef USE_TLS
593 new_thread = _dl_allocate_tls ();
594 if (new_thread == NULL)
595 return EAGAIN;
596 #else
597 /* Prevent warnings. */
598 new_thread = NULL;
599 #endif
601 /* First check whether we have to change the policy and if yes, whether
602 we can do this. Normally this should be done by examining the
603 return value of the __sched_setscheduler call in pthread_start_thread
604 but this is hard to implement. FIXME */
605 if (attr != NULL && attr->__schedpolicy != SCHED_OTHER && geteuid () != 0)
606 return EPERM;
607 /* Find a free segment for the thread, and allocate a stack if needed */
608 for (sseg = 2; ; sseg++)
610 if (sseg >= PTHREAD_THREADS_MAX)
612 #ifdef USE_TLS
613 _dl_deallocate_tls (new_thread);
614 #endif
615 return EAGAIN;
617 if (__pthread_handles[sseg].h_descr != NULL)
618 continue;
619 if (pthread_allocate_stack(attr, thread_segment(sseg),
620 pagesize, &stack_addr, &new_thread_bottom,
621 &guardaddr, &guardsize) == 0)
623 #ifdef USE_TLS
624 new_thread->p_stackaddr = stack_addr;
625 #else
626 new_thread = (pthread_descr) stack_addr;
627 #endif
628 break;
631 __pthread_handles_num++;
632 /* Allocate new thread identifier */
633 pthread_threads_counter += PTHREAD_THREADS_MAX;
634 new_thread_id = sseg + pthread_threads_counter;
635 /* Initialize the thread descriptor. Elements which have to be
636 initialized to zero already have this value. */
637 new_thread->p_tid = new_thread_id;
638 new_thread->p_lock = &(__pthread_handles[sseg].h_lock);
639 new_thread->p_cancelstate = PTHREAD_CANCEL_ENABLE;
640 new_thread->p_canceltype = PTHREAD_CANCEL_DEFERRED;
641 new_thread->p_errnop = &new_thread->p_errno;
642 new_thread->p_h_errnop = &new_thread->p_h_errno;
643 new_thread->p_resp = &new_thread->p_res;
644 new_thread->p_guardaddr = guardaddr;
645 new_thread->p_guardsize = guardsize;
646 new_thread->p_header.data.self = new_thread;
647 new_thread->p_nr = sseg;
648 new_thread->p_inheritsched = attr ? attr->__inheritsched : 0;
649 /* Initialize the thread handle */
650 __pthread_init_lock(&__pthread_handles[sseg].h_lock);
651 __pthread_handles[sseg].h_descr = new_thread;
652 __pthread_handles[sseg].h_bottom = new_thread_bottom;
653 /* Determine scheduling parameters for the thread */
654 new_thread->p_start_args.schedpolicy = -1;
655 if (attr != NULL) {
656 new_thread->p_detached = attr->__detachstate;
657 new_thread->p_userstack = attr->__stackaddr_set;
659 switch(attr->__inheritsched) {
660 case PTHREAD_EXPLICIT_SCHED:
661 new_thread->p_start_args.schedpolicy = attr->__schedpolicy;
662 memcpy (&new_thread->p_start_args.schedparam, &attr->__schedparam,
663 sizeof (struct sched_param));
664 break;
665 case PTHREAD_INHERIT_SCHED:
666 new_thread->p_start_args.schedpolicy = __sched_getscheduler(father_pid);
667 __sched_getparam(father_pid, &new_thread->p_start_args.schedparam);
668 break;
670 new_thread->p_priority =
671 new_thread->p_start_args.schedparam.sched_priority;
673 /* Finish setting up arguments to pthread_start_thread */
674 new_thread->p_start_args.start_routine = start_routine;
675 new_thread->p_start_args.arg = arg;
676 new_thread->p_start_args.mask = *mask;
677 /* Make the new thread ID available already now. If any of the later
678 functions fail we return an error value and the caller must not use
679 the stored thread ID. */
680 *thread = new_thread_id;
681 /* Raise priority of thread manager if needed */
682 __pthread_manager_adjust_prio(new_thread->p_priority);
683 /* Do the cloning. We have to use two different functions depending
684 on whether we are debugging or not. */
685 pid = 0; /* Note that the thread never can have PID zero. */
686 if (report_events)
688 /* See whether the TD_CREATE event bit is set in any of the
689 masks. */
690 int idx = __td_eventword (TD_CREATE);
691 uint32_t mask = __td_eventmask (TD_CREATE);
693 if ((mask & (__pthread_threads_events.event_bits[idx]
694 | event_maskp->event_bits[idx])) != 0)
696 /* Lock the mutex the child will use now so that it will stop. */
697 __pthread_lock(new_thread->p_lock, NULL);
699 /* We have to report this event. */
700 #ifdef NEED_SEPARATE_REGISTER_STACK
701 /* Perhaps this version should be used on all platforms. But
702 this requires that __clone2 be uniformly supported
703 everywhere.
705 And there is some argument for changing the __clone2
706 interface to pass sp and bsp instead, making it more IA64
707 specific, but allowing stacks to grow outward from each
708 other, to get less paging and fewer mmaps. */
709 pid = __clone2(pthread_start_thread_event,
710 (void **)new_thread_bottom,
711 (char *)new_thread - new_thread_bottom,
712 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
713 __pthread_sig_cancel, new_thread);
714 #elif _STACK_GROWS_UP
715 pid = __clone(pthread_start_thread_event, (void **) new_thread_bottom,
716 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
717 __pthread_sig_cancel, new_thread);
718 #else
719 pid = __clone(pthread_start_thread_event, (void **) new_thread,
720 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
721 __pthread_sig_cancel, new_thread);
722 #endif
723 saved_errno = errno;
724 if (pid != -1)
726 /* Now fill in the information about the new thread in
727 the newly created thread's data structure. We cannot let
728 the new thread do this since we don't know whether it was
729 already scheduled when we send the event. */
730 new_thread->p_eventbuf.eventdata = new_thread;
731 new_thread->p_eventbuf.eventnum = TD_CREATE;
732 __pthread_last_event = new_thread;
734 /* We have to set the PID here since the callback function
735 in the debug library will need it and we cannot guarantee
736 the child got scheduled before the debugger. */
737 new_thread->p_pid = pid;
739 /* Now call the function which signals the event. */
740 __linuxthreads_create_event ();
742 /* Now restart the thread. */
743 __pthread_unlock(new_thread->p_lock);
747 if (pid == 0)
749 #ifdef NEED_SEPARATE_REGISTER_STACK
750 pid = __clone2(pthread_start_thread,
751 (void **)new_thread_bottom,
752 (char *)stack_addr - new_thread_bottom,
753 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
754 __pthread_sig_cancel, new_thread);
755 #elif _STACK_GROWS_UP
756 pid = __clone(pthread_start_thread, (void *) new_thread_bottom,
757 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
758 __pthread_sig_cancel, new_thread);
759 #else
760 pid = __clone(pthread_start_thread, stack_addr,
761 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
762 __pthread_sig_cancel, new_thread);
763 #endif /* !NEED_SEPARATE_REGISTER_STACK */
764 saved_errno = errno;
766 /* Check if cloning succeeded */
767 if (pid == -1) {
768 /* Free the stack if we allocated it */
769 if (attr == NULL || !attr->__stackaddr_set)
771 #ifdef NEED_SEPARATE_REGISTER_STACK
772 size_t stacksize = ((char *)(new_thread->p_guardaddr)
773 - new_thread_bottom);
774 munmap((caddr_t)new_thread_bottom,
775 2 * stacksize + new_thread->p_guardsize);
776 #elif _STACK_GROWS_UP
777 # ifdef USE_TLS
778 size_t stacksize = guardaddr - stack_addr;
779 munmap(stack_addr, stacksize + guardsize);
780 # else
781 size_t stacksize = guardaddr - (char *)new_thread;
782 munmap(new_thread, stacksize + guardsize);
783 # endif
784 #else
785 # ifdef USE_TLS
786 size_t stacksize = stack_addr - new_thread_bottom;
787 # else
788 size_t stacksize = (char *)(new_thread+1) - new_thread_bottom;
789 # endif
790 munmap(new_thread_bottom - guardsize, guardsize + stacksize);
791 #endif
793 #ifdef USE_TLS
794 _dl_deallocate_tls (new_thread);
795 #endif
796 __pthread_handles[sseg].h_descr = NULL;
797 __pthread_handles[sseg].h_bottom = NULL;
798 __pthread_handles_num--;
799 return saved_errno;
801 /* Insert new thread in doubly linked list of active threads */
802 new_thread->p_prevlive = __pthread_main_thread;
803 new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
804 __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
805 __pthread_main_thread->p_nextlive = new_thread;
806 /* Set pid field of the new thread, in case we get there before the
807 child starts. */
808 new_thread->p_pid = pid;
809 return 0;
813 /* Try to free the resources of a thread when requested by pthread_join
814 or pthread_detach on a terminated thread. */
816 static void pthread_free(pthread_descr th)
818 pthread_handle handle;
819 pthread_readlock_info *iter, *next;
821 ASSERT(th->p_exited);
822 /* Make the handle invalid */
823 handle = thread_handle(th->p_tid);
824 __pthread_lock(&handle->h_lock, NULL);
825 handle->h_descr = NULL;
826 handle->h_bottom = (char *)(-1L);
827 __pthread_unlock(&handle->h_lock);
828 #ifdef FREE_THREAD
829 FREE_THREAD(th, th->p_nr);
830 #endif
831 /* One fewer threads in __pthread_handles */
832 __pthread_handles_num--;
834 /* Destroy read lock list, and list of free read lock structures.
835 If the former is not empty, it means the thread exited while
836 holding read locks! */
838 for (iter = th->p_readlock_list; iter != NULL; iter = next)
840 next = iter->pr_next;
841 free(iter);
844 for (iter = th->p_readlock_free; iter != NULL; iter = next)
846 next = iter->pr_next;
847 free(iter);
850 /* If initial thread, nothing to free */
851 if (!th->p_userstack)
853 size_t guardsize = th->p_guardsize;
854 /* Free the stack and thread descriptor area */
855 char *guardaddr = th->p_guardaddr;
856 #ifdef _STACK_GROWS_UP
857 # ifdef USE_TLS
858 size_t stacksize = guardaddr - th->p_stackaddr;
859 # else
860 size_t stacksize = guardaddr - (char *)th;
861 # endif
862 guardaddr = (char *)th;
863 #else
864 /* Guardaddr is always set, even if guardsize is 0. This allows
865 us to compute everything else. */
866 # ifdef USE_TLS
867 size_t stacksize = th->p_stackaddr - guardaddr - guardsize;
868 # else
869 size_t stacksize = (char *)(th+1) - guardaddr - guardsize;
870 # endif
871 # ifdef NEED_SEPARATE_REGISTER_STACK
872 /* Take account of the register stack, which is below guardaddr. */
873 guardaddr -= stacksize;
874 stacksize *= 2;
875 # endif
876 #endif
877 /* Unmap the stack. */
878 munmap(guardaddr, stacksize + guardsize);
880 #ifdef USE_TLS
881 _dl_deallocate_tls (th);
882 #endif
886 /* Handle threads that have exited */
888 static void pthread_exited(pid_t pid)
890 pthread_descr th;
891 int detached;
892 /* Find thread with that pid */
893 for (th = __pthread_main_thread->p_nextlive;
894 th != __pthread_main_thread;
895 th = th->p_nextlive) {
896 if (th->p_pid == pid) {
897 /* Remove thread from list of active threads */
898 th->p_nextlive->p_prevlive = th->p_prevlive;
899 th->p_prevlive->p_nextlive = th->p_nextlive;
900 /* Mark thread as exited, and if detached, free its resources */
901 __pthread_lock(th->p_lock, NULL);
902 th->p_exited = 1;
903 /* If we have to signal this event do it now. */
904 if (th->p_report_events)
906 /* See whether TD_REAP is in any of the mask. */
907 int idx = __td_eventword (TD_REAP);
908 uint32_t mask = __td_eventmask (TD_REAP);
910 if ((mask & (__pthread_threads_events.event_bits[idx]
911 | th->p_eventbuf.eventmask.event_bits[idx])) != 0)
913 /* Yep, we have to signal the reapage. */
914 th->p_eventbuf.eventnum = TD_REAP;
915 th->p_eventbuf.eventdata = th;
916 __pthread_last_event = th;
918 /* Now call the function to signal the event. */
919 __linuxthreads_reap_event();
922 detached = th->p_detached;
923 __pthread_unlock(th->p_lock);
924 if (detached)
925 pthread_free(th);
926 break;
929 /* If all threads have exited and the main thread is pending on a
930 pthread_exit, wake up the main thread and terminate ourselves. */
931 if (main_thread_exiting &&
932 __pthread_main_thread->p_nextlive == __pthread_main_thread) {
933 restart(__pthread_main_thread);
934 /* Same logic as REQ_MAIN_THREAD_EXIT. */
938 static void pthread_reap_children(void)
940 pid_t pid;
941 int status;
943 while ((pid = __libc_waitpid(-1, &status, WNOHANG | __WCLONE)) > 0) {
944 pthread_exited(pid);
945 if (WIFSIGNALED(status)) {
946 /* If a thread died due to a signal, send the same signal to
947 all other threads, including the main thread. */
948 pthread_kill_all_threads(WTERMSIG(status), 1);
949 _exit(0);
954 /* Try to free the resources of a thread when requested by pthread_join
955 or pthread_detach on a terminated thread. */
957 static void pthread_handle_free(pthread_t th_id)
959 pthread_handle handle = thread_handle(th_id);
960 pthread_descr th;
962 __pthread_lock(&handle->h_lock, NULL);
963 if (nonexisting_handle(handle, th_id)) {
964 /* pthread_reap_children has deallocated the thread already,
965 nothing needs to be done */
966 __pthread_unlock(&handle->h_lock);
967 return;
969 th = handle->h_descr;
970 if (th->p_exited) {
971 __pthread_unlock(&handle->h_lock);
972 pthread_free(th);
973 } else {
974 /* The Unix process of the thread is still running.
975 Mark the thread as detached so that the thread manager will
976 deallocate its resources when the Unix process exits. */
977 th->p_detached = 1;
978 __pthread_unlock(&handle->h_lock);
982 /* Send a signal to all running threads */
984 static void pthread_kill_all_threads(int sig, int main_thread_also)
986 pthread_descr th;
987 for (th = __pthread_main_thread->p_nextlive;
988 th != __pthread_main_thread;
989 th = th->p_nextlive) {
990 kill(th->p_pid, sig);
992 if (main_thread_also) {
993 kill(__pthread_main_thread->p_pid, sig);
997 static void pthread_for_each_thread(void *arg,
998 void (*fn)(void *, pthread_descr))
1000 pthread_descr th;
1002 for (th = __pthread_main_thread->p_nextlive;
1003 th != __pthread_main_thread;
1004 th = th->p_nextlive) {
1005 fn(arg, th);
1008 fn(arg, __pthread_main_thread);
1011 /* Process-wide exit() */
1013 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
1015 pthread_descr th;
1016 __pthread_exit_requested = 1;
1017 __pthread_exit_code = exitcode;
1018 /* A forced asynchronous cancellation follows. Make sure we won't
1019 get stuck later in the main thread with a system lock being held
1020 by one of the cancelled threads. Ideally one would use the same
1021 code as in pthread_atfork(), but we can't distinguish system and
1022 user handlers there. */
1023 __flockfilelist();
1024 /* Send the CANCEL signal to all running threads, including the main
1025 thread, but excluding the thread from which the exit request originated
1026 (that thread must complete the exit, e.g. calling atexit functions
1027 and flushing stdio buffers). */
1028 for (th = issuing_thread->p_nextlive;
1029 th != issuing_thread;
1030 th = th->p_nextlive) {
1031 kill(th->p_pid, __pthread_sig_cancel);
1033 /* Now, wait for all these threads, so that they don't become zombies
1034 and their times are properly added to the thread manager's times. */
1035 for (th = issuing_thread->p_nextlive;
1036 th != issuing_thread;
1037 th = th->p_nextlive) {
1038 waitpid(th->p_pid, NULL, __WCLONE);
1040 __fresetlockfiles();
1041 restart(issuing_thread);
1042 _exit(0);
1045 /* Handler for __pthread_sig_cancel in thread manager thread */
1047 void __pthread_manager_sighandler(int sig)
1049 int kick_manager = terminated_children == 0 && main_thread_exiting;
1050 terminated_children = 1;
1052 /* If the main thread is terminating, kick the thread manager loop
1053 each time some threads terminate. This eliminates a two second
1054 shutdown delay caused by the thread manager sleeping in the
1055 call to __poll(). Instead, the thread manager is kicked into
1056 action, reaps the outstanding threads and resumes the main thread
1057 so that it can complete the shutdown. */
1059 if (kick_manager) {
1060 struct pthread_request request;
1061 request.req_thread = 0;
1062 request.req_kind = REQ_KICK;
1063 TEMP_FAILURE_RETRY(__libc_write(__pthread_manager_request,
1064 (char *) &request, sizeof(request)));
1068 /* Adjust priority of thread manager so that it always run at a priority
1069 higher than all threads */
1071 void __pthread_manager_adjust_prio(int thread_prio)
1073 struct sched_param param;
1075 if (thread_prio <= manager_thread->p_priority) return;
1076 param.sched_priority =
1077 thread_prio < __sched_get_priority_max(SCHED_FIFO)
1078 ? thread_prio + 1 : thread_prio;
1079 __sched_setscheduler(manager_thread->p_pid, SCHED_FIFO, &param);
1080 manager_thread->p_priority = thread_prio;