linuxthreads/manager.c

   1 /* Linuxthreads - a simple clone()-based implementation of Posix        */
   2 /* threads for Linux.                                                   */
   3 /* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr)              */
   4 /*                                                                      */
   5 /* This program is free software; you can redistribute it and/or        */
   6 /* modify it under the terms of the GNU Library General Public License  */
   7 /* as published by the Free Software Foundation; either version 2       */
   8 /* of the License, or (at your option) any later version.               */
   9 /*                                                                      */
  10 /* This program is distributed in the hope that it will be useful,      */
  11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
  12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
  13 /* GNU Library General Public License for more details.                 */
  14
  15 /* The "thread manager" thread: manages creation and termination of threads */
  16
  17 #include <errno.h>
  18 #include <sched.h>
  19 #include <stddef.h>
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <sys/poll.h>           /* for poll */
  25 #include <sys/mman.h>           /* for mmap */
  26 #include <sys/param.h>
  27 #include <sys/time.h>
  28 #include <sys/wait.h>           /* for waitpid macros */
  29
  30 #include "pthread.h"
  31 #include "internals.h"
  32 #include "spinlock.h"
  33 #include "restart.h"
  34 #include "semaphore.h"
  35
  36 /* Array of active threads. Entry 0 is reserved for the initial thread. */
  37 struct pthread_handle_struct __pthread_handles[PTHREAD_THREADS_MAX] =
  38 { { LOCK_INITIALIZER, &__pthread_initial_thread, 0},
  39   { LOCK_INITIALIZER, &__pthread_manager_thread, 0}, /* All NULLs */ };
  40
  41 /* For debugging purposes put the maximum number of threads in a variable.  */
  42 const int __linuxthreads_pthread_threads_max = PTHREAD_THREADS_MAX;
  43
  44 /* Indicate whether at least one thread has a user-defined stack (if 1),
  45    or if all threads have stacks supplied by LinuxThreads (if 0). */
  46 int __pthread_nonstandard_stacks;
  47
  48 /* Number of active entries in __pthread_handles (used by gdb) */
  49 volatile int __pthread_handles_num = 2;
  50
  51 /* Whether to use debugger additional actions for thread creation
  52    (set to 1 by gdb) */
  53 volatile int __pthread_threads_debug;
  54
  55 /* Globally enabled events.  */
  56 volatile td_thr_events_t __pthread_threads_events;
  57
  58 /* Pointer to thread descriptor with last event.  */
  59 volatile pthread_descr __pthread_last_event;
  60
  61 /* Mapping from stack segment to thread descriptor. */
  62 /* Stack segment numbers are also indices into the __pthread_handles array. */
  63 /* Stack segment number 0 is reserved for the initial thread. */
  64
  65 static inline pthread_descr thread_segment(int seg)
  66 {
  67   return (pthread_descr)(THREAD_STACK_START_ADDRESS - (seg - 1) * STACK_SIZE)
  68          - 1;
  69 }
  70
  71 /* Flag set in signal handler to record child termination */
  72
  73 static volatile int terminated_children = 0;
  74
  75 /* Flag set when the initial thread is blocked on pthread_exit waiting
  76    for all other threads to terminate */
  77
  78 static int main_thread_exiting = 0;
  79
  80 /* Counter used to generate unique thread identifier.
  81    Thread identifier is pthread_threads_counter + segment. */
  82
  83 static pthread_t pthread_threads_counter = 0;
  84
  85 #ifdef NEED_SEPARATE_REGISTER_STACK
  86 /* Signal masks for the manager.  These have to be global only when clone2
  87    is used since it's currently borken wrt signals in the child.  */
  88 static sigset_t manager_mask;           /* Manager normal signal mask   */
  89 static sigset_t manager_mask_all;       /* All bits set.        */
  90 #endif
  91
  92 /* Forward declarations */
  93
  94 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
  95                                  void * (*start_routine)(void *), void *arg,
  96                                  sigset_t *mask, int father_pid,
  97                                  int report_events,
  98                                  td_thr_events_t *event_maskp);
  99 static void pthread_handle_free(pthread_t th_id);
 100 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode);
 101 static void pthread_reap_children(void);
 102 static void pthread_kill_all_threads(int sig, int main_thread_also);
 103
 104 /* The server thread managing requests for thread creation and termination */
 105
 106 int __pthread_manager(void *arg)
 107 {
 108   int reqfd = (int) (long int) arg;
 109   struct pollfd ufd;
 110 #ifndef NEED_SEPARATE_REGISTER_STACK
 111   sigset_t manager_mask;
 112 #endif
 113   int n;
 114   struct pthread_request request;
 115
 116   /* If we have special thread_self processing, initialize it.  */
 117 #ifdef INIT_THREAD_SELF
 118   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 119 #endif
 120   /* Set the error variable.  */
 121   __pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno;
 122   __pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno;
 123   /* Block all signals except __pthread_sig_cancel and SIGTRAP */
 124   sigfillset(&manager_mask);
 125   sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
 126   sigdelset(&manager_mask, SIGTRAP);            /* for debugging purposes */
 127   if (__pthread_threads_debug && __pthread_sig_debug > 0)
 128     sigdelset(&manager_mask, __pthread_sig_debug);
 129   sigprocmask(SIG_SETMASK, &manager_mask, NULL);
 130 #ifdef NEED_SEPARATE_REGISTER_STACK
 131   sigfillset(&manager_mask_all);
 132 #endif
 133   /* Raise our priority to match that of main thread */
 134   __pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
 135   /* Synchronize debugging of the thread manager */
 136   n = __libc_read(reqfd, (char *)&request, sizeof(request));
 137   ASSERT(n == sizeof(request) && request.req_kind == REQ_DEBUG);
 138   ufd.fd = reqfd;
 139   ufd.events = POLLIN;
 140   /* Enter server loop */
 141   while(1) {
 142     n = __poll(&ufd, 1, 2000);
 143
 144     /* Check for termination of the main thread */
 145     if (getppid() == 1) {
 146       pthread_kill_all_threads(SIGKILL, 0);
 147       _exit(0);
 148     }
 149     /* Check for dead children */
 150     if (terminated_children) {
 151       terminated_children = 0;
 152       pthread_reap_children();
 153     }
 154     /* Read and execute request */
 155     if (n == 1 && (ufd.revents & POLLIN)) {
 156       n = __libc_read(reqfd, (char *)&request, sizeof(request));
 157       ASSERT(n == sizeof(request));
 158       switch(request.req_kind) {
 159       case REQ_CREATE:
 160         request.req_thread->p_retcode =
 161           pthread_handle_create((pthread_t *) &request.req_thread->p_retval,
 162                                 request.req_args.create.attr,
 163                                 request.req_args.create.fn,
 164                                 request.req_args.create.arg,
 165                                 &request.req_args.create.mask,
 166                                 request.req_thread->p_pid,
 167                                 request.req_thread->p_report_events,
 168                                 &request.req_thread->p_eventbuf.eventmask);
 169         restart(request.req_thread);
 170         break;
 171       case REQ_FREE:
 172         pthread_handle_free(request.req_args.free.thread_id);
 173         break;
 174       case REQ_PROCESS_EXIT:
 175         pthread_handle_exit(request.req_thread,
 176                             request.req_args.exit.code);
 177         /* NOTREACHED */
 178         break;
 179       case REQ_MAIN_THREAD_EXIT:
 180         main_thread_exiting = 1;
 181         /* Reap children in case all other threads died and the signal handler
 182            went off before we set main_thread_exiting to 1, and therefore did
 183            not do REQ_KICK. */
 184         pthread_reap_children();
 185
 186         if (__pthread_main_thread->p_nextlive == __pthread_main_thread) {
 187           restart(__pthread_main_thread);
 188           /* The main thread will now call exit() which will trigger an
 189              __on_exit handler, which in turn will send REQ_PROCESS_EXIT
 190              to the thread manager. In case you are wondering how the
 191              manager terminates from its loop here. */
 192         }
 193         break;
 194       case REQ_POST:
 195         __new_sem_post(request.req_args.post);
 196         break;
 197       case REQ_DEBUG:
 198         /* Make gdb aware of new thread and gdb will restart the
 199            new thread when it is ready to handle the new thread. */
 200         if (__pthread_threads_debug && __pthread_sig_debug > 0)
 201           raise(__pthread_sig_debug);
 202         break;
 203       case REQ_KICK:
 204         /* This is just a prod to get the manager to reap some
 205            threads right away, avoiding a potential delay at shutdown. */
 206         break;
 207       }
 208     }
 209   }
 210 }
 211
 212 int __pthread_manager_event(void *arg)
 213 {
 214   /* If we have special thread_self processing, initialize it.  */
 215 #ifdef INIT_THREAD_SELF
 216   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 217 #endif
 218
 219   /* Get the lock the manager will free once all is correctly set up.  */
 220   __pthread_lock (THREAD_GETMEM((&__pthread_manager_thread), p_lock), NULL);
 221   /* Free it immediately.  */
 222   __pthread_unlock (THREAD_GETMEM((&__pthread_manager_thread), p_lock));
 223
 224   return __pthread_manager(arg);
 225 }
 226
 227 /* Process creation */
 228
 229 static int pthread_start_thread(void *arg)
 230 {
 231   pthread_descr self = (pthread_descr) arg;
 232   struct pthread_request request;
 233   void * outcome;
 234   /* Initialize special thread_self processing, if any.  */
 235 #ifdef INIT_THREAD_SELF
 236   INIT_THREAD_SELF(self, self->p_nr);
 237 #endif
 238   /* Make sure our pid field is initialized, just in case we get there
 239      before our father has initialized it. */
 240   THREAD_SETMEM(self, p_pid, __getpid());
 241   /* Initial signal mask is that of the creating thread. (Otherwise,
 242      we'd just inherit the mask of the thread manager.) */
 243   sigprocmask(SIG_SETMASK, &self->p_start_args.mask, NULL);
 244   /* Set the scheduling policy and priority for the new thread, if needed */
 245   if (THREAD_GETMEM(self, p_start_args.schedpolicy) >= 0)
 246     /* Explicit scheduling attributes were provided: apply them */
 247     __sched_setscheduler(THREAD_GETMEM(self, p_pid),
 248                          THREAD_GETMEM(self, p_start_args.schedpolicy),
 249                          &self->p_start_args.schedparam);
 250   else if (__pthread_manager_thread.p_priority > 0)
 251     /* Default scheduling required, but thread manager runs in realtime
 252        scheduling: switch new thread to SCHED_OTHER policy */
 253     {
 254       struct sched_param default_params;
 255       default_params.sched_priority = 0;
 256       __sched_setscheduler(THREAD_GETMEM(self, p_pid),
 257                            SCHED_OTHER, &default_params);
 258     }
 259   /* Make gdb aware of new thread */
 260   if (__pthread_threads_debug && __pthread_sig_debug > 0) {
 261     request.req_thread = self;
 262     request.req_kind = REQ_DEBUG;
 263     __libc_write(__pthread_manager_request,
 264                  (char *) &request, sizeof(request));
 265     suspend(self);
 266   }
 267   /* Run the thread code */
 268   outcome = self->p_start_args.start_routine(THREAD_GETMEM(self,
 269                                                            p_start_args.arg));
 270   /* Exit with the given return value */
 271   pthread_exit(outcome);
 272   return 0;
 273 }
 274
 275 static int pthread_start_thread_event(void *arg)
 276 {
 277   pthread_descr self = (pthread_descr) arg;
 278
 279 #ifdef INIT_THREAD_SELF
 280   INIT_THREAD_SELF(self, self->p_nr);
 281 #endif
 282   /* Make sure our pid field is initialized, just in case we get there
 283      before our father has initialized it. */
 284   THREAD_SETMEM(self, p_pid, __getpid());
 285   /* Get the lock the manager will free once all is correctly set up.  */
 286   __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
 287   /* Free it immediately.  */
 288   __pthread_unlock (THREAD_GETMEM(self, p_lock));
 289
 290   /* Continue with the real function.  */
 291   return pthread_start_thread (arg);
 292 }
 293
 294 static int pthread_allocate_stack(const pthread_attr_t *attr,
 295                                   pthread_descr default_new_thread,
 296                                   int pagesize,
 297                                   pthread_descr * out_new_thread,
 298                                   char ** out_new_thread_bottom,
 299                                   char ** out_guardaddr,
 300                                   size_t * out_guardsize)
 301 {
 302   pthread_descr new_thread;
 303   char * new_thread_bottom;
 304   char * guardaddr;
 305   size_t stacksize, guardsize;
 306
 307   if (attr != NULL && attr->__stackaddr_set)
 308     {
 309       /* The user provided a stack.  For now we interpret the supplied
 310          address as 1 + the highest addr. in the stack segment.  If a
 311          separate register stack is needed, we place it at the low end
 312          of the segment, relying on the associated stacksize to
 313          determine the low end of the segment.  This differs from many
 314          (but not all) other pthreads implementations.  The intent is
 315          that on machines with a single stack growing toward higher
 316          addresses, stackaddr would be the lowest address in the stack
 317          segment, so that it is consistently close to the initial sp
 318          value. */
 319       new_thread =
 320         (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
 321       new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
 322       guardaddr = NULL;
 323       guardsize = 0;
 324       __pthread_nonstandard_stacks = 1;
 325     }
 326   else
 327     {
 328 #ifdef NEED_SEPARATE_REGISTER_STACK
 329       size_t granularity = 2 * pagesize;
 330       /* Try to make stacksize/2 a multiple of pagesize */
 331 #else
 332       size_t granularity = pagesize;
 333 #endif
 334       /* Allocate space for stack and thread descriptor at default address */
 335       if (attr != NULL)
 336         {
 337           guardsize = page_roundup (attr->__guardsize, granularity);
 338           stacksize = STACK_SIZE - guardsize;
 339           stacksize = MIN (stacksize,
 340                            page_roundup (attr->__stacksize, granularity));
 341         }
 342       else
 343         {
 344           guardsize = granularity;
 345           stacksize = STACK_SIZE - granularity;
 346         }
 347       new_thread = default_new_thread;
 348 #ifdef NEED_SEPARATE_REGISTER_STACK
 349       new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize;
 350       /* Includes guard area, unlike the normal case.  Use the bottom
 351        end of the segment as backing store for the register stack.
 352        Needed on IA64.  In this case, we also map the entire stack at
 353        once.  According to David Mosberger, that's cheaper.  It also
 354        avoids the risk of intermittent failures due to other mappings
 355        in the same region.  The cost is that we might be able to map
 356        slightly fewer stacks.  */
 357
 358       /* First the main stack: */
 359       if (mmap((caddr_t)((char *)(new_thread + 1) - stacksize / 2),
 360                stacksize / 2, PROT_READ | PROT_WRITE | PROT_EXEC,
 361                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
 362           == MAP_FAILED)
 363         /* Bad luck, this segment is already mapped. */
 364         return -1;
 365       /* Then the register stack:       */
 366       if (mmap((caddr_t)new_thread_bottom, stacksize/2,
 367                PROT_READ | PROT_WRITE | PROT_EXEC,
 368                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
 369           == MAP_FAILED)
 370         {
 371           munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2),
 372                  stacksize/2);
 373           return -1;
 374         }
 375
 376       guardaddr = new_thread_bottom + stacksize/2;
 377       /* We leave the guard area in the middle unmapped.        */
 378 #else  /* !NEED_SEPARATE_REGISTER_STACK */
 379       new_thread_bottom = (char *) (new_thread + 1) - stacksize;
 380       if (mmap((caddr_t)((char *)(new_thread + 1) - INITIAL_STACK_SIZE),
 381                INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
 382                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_GROWSDOWN,
 383                -1, 0) == MAP_FAILED)
 384         /* Bad luck, this segment is already mapped. */
 385         return -1;
 386       /* We manage to get a stack.  Now see whether we need a guard
 387          and allocate it if necessary.  Notice that the default
 388          attributes (stack_size = STACK_SIZE - pagesize and guardsize
 389          = pagesize) do not need a guard page, since the RLIMIT_STACK
 390          soft limit prevents stacks from running into one another. */
 391       if (stacksize == STACK_SIZE - pagesize)
 392         {
 393           /* We don't need a guard page. */
 394           guardaddr = NULL;
 395           guardsize = 0;
 396         }
 397       else
 398         {
 399           /* Put a bad page at the bottom of the stack */
 400           guardaddr = (void *)new_thread_bottom - guardsize;
 401           if (mmap ((caddr_t) guardaddr, guardsize, 0, MAP_FIXED, -1, 0)
 402               == MAP_FAILED)
 403             {
 404               /* We don't make this an error.  */
 405               guardaddr = NULL;
 406               guardsize = 0;
 407             }
 408         }
 409 #endif /* !NEED_SEPARATE_REGISTER_STACK */
 410     }
 411   /* Clear the thread data structure.  */
 412   memset (new_thread, '\0', sizeof (*new_thread));
 413   *out_new_thread = new_thread;
 414   *out_new_thread_bottom = new_thread_bottom;
 415   *out_guardaddr = guardaddr;
 416   *out_guardsize = guardsize;
 417   return 0;
 418 }
 419
 420 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
 421                                  void * (*start_routine)(void *), void *arg,
 422                                  sigset_t * mask, int father_pid,
 423                                  int report_events,
 424                                  td_thr_events_t *event_maskp)
 425 {
 426   size_t sseg;
 427   int pid;
 428   pthread_descr new_thread;
 429   char * new_thread_bottom;
 430   pthread_t new_thread_id;
 431   char *guardaddr = NULL;
 432   size_t guardsize = 0;
 433   int pagesize = __getpagesize();
 434
 435   /* First check whether we have to change the policy and if yes, whether
 436      we can  do this.  Normally this should be done by examining the
 437      return value of the __sched_setscheduler call in pthread_start_thread
 438      but this is hard to implement.  FIXME  */
 439   if (attr != NULL && attr->__schedpolicy != SCHED_OTHER && geteuid () != 0)
 440     return EPERM;
 441   /* Find a free segment for the thread, and allocate a stack if needed */
 442   for (sseg = 2; ; sseg++)
 443     {
 444       if (sseg >= PTHREAD_THREADS_MAX)
 445         return EAGAIN;
 446       if (__pthread_handles[sseg].h_descr != NULL)
 447         continue;
 448       if (pthread_allocate_stack(attr, thread_segment(sseg), pagesize,
 449                                  &new_thread, &new_thread_bottom,
 450                                  &guardaddr, &guardsize) == 0)
 451         break;
 452     }
 453   __pthread_handles_num++;
 454   /* Allocate new thread identifier */
 455   pthread_threads_counter += PTHREAD_THREADS_MAX;
 456   new_thread_id = sseg + pthread_threads_counter;
 457   /* Initialize the thread descriptor.  Elements which have to be
 458      initialized to zero already have this value.  */
 459   new_thread->p_tid = new_thread_id;
 460   new_thread->p_lock = &(__pthread_handles[sseg].h_lock);
 461   new_thread->p_cancelstate = PTHREAD_CANCEL_ENABLE;
 462   new_thread->p_canceltype = PTHREAD_CANCEL_DEFERRED;
 463   new_thread->p_errnop = &new_thread->p_errno;
 464   new_thread->p_h_errnop = &new_thread->p_h_errno;
 465   new_thread->p_resp = &new_thread->p_res;
 466   new_thread->p_guardaddr = guardaddr;
 467   new_thread->p_guardsize = guardsize;
 468   new_thread->p_header.data.self = new_thread;
 469   new_thread->p_nr = sseg;
 470   /* Initialize the thread handle */
 471   __pthread_init_lock(&__pthread_handles[sseg].h_lock);
 472   __pthread_handles[sseg].h_descr = new_thread;
 473   __pthread_handles[sseg].h_bottom = new_thread_bottom;
 474   /* Determine scheduling parameters for the thread */
 475   new_thread->p_start_args.schedpolicy = -1;
 476   if (attr != NULL) {
 477     new_thread->p_detached = attr->__detachstate;
 478     new_thread->p_userstack = attr->__stackaddr_set;
 479
 480     switch(attr->__inheritsched) {
 481     case PTHREAD_EXPLICIT_SCHED:
 482       new_thread->p_start_args.schedpolicy = attr->__schedpolicy;
 483       memcpy (&new_thread->p_start_args.schedparam, &attr->__schedparam,
 484               sizeof (struct sched_param));
 485       break;
 486     case PTHREAD_INHERIT_SCHED:
 487       new_thread->p_start_args.schedpolicy = __sched_getscheduler(father_pid);
 488       __sched_getparam(father_pid, &new_thread->p_start_args.schedparam);
 489       break;
 490     }
 491     new_thread->p_priority =
 492       new_thread->p_start_args.schedparam.sched_priority;
 493   }
 494   /* Finish setting up arguments to pthread_start_thread */
 495   new_thread->p_start_args.start_routine = start_routine;
 496   new_thread->p_start_args.arg = arg;
 497   new_thread->p_start_args.mask = *mask;
 498   /* Make the new thread ID available already now.  If any of the later
 499      functions fail we return an error value and the caller must not use
 500      the stored thread ID.  */
 501   *thread = new_thread_id;
 502   /* Raise priority of thread manager if needed */
 503   __pthread_manager_adjust_prio(new_thread->p_priority);
 504   /* Do the cloning.  We have to use two different functions depending
 505      on whether we are debugging or not.  */
 506   pid = 0;      /* Note that the thread never can have PID zero.  */
 507   if (report_events)
 508     {
 509       /* See whether the TD_CREATE event bit is set in any of the
 510          masks.  */
 511       int idx = __td_eventword (TD_CREATE);
 512       uint32_t mask = __td_eventmask (TD_CREATE);
 513
 514       if ((mask & (__pthread_threads_events.event_bits[idx]
 515                    | event_maskp->event_bits[idx])) != 0)
 516         {
 517           /* Lock the mutex the child will use now so that it will stop.  */
 518           __pthread_lock(new_thread->p_lock, NULL);
 519
 520           /* We have to report this event.  */
 521 #ifdef NEED_SEPARATE_REGISTER_STACK
 522           /* Perhaps this version should be used on all platforms. But
 523            this requires that __clone2 be uniformly supported
 524            everywhere.
 525
 526            And there is some argument for changing the __clone2
 527            interface to pass sp and bsp instead, making it more IA64
 528            specific, but allowing stacks to grow outward from each
 529            other, to get less paging and fewer mmaps.  Clone2
 530            currently can't take signals in the child right after
 531            process creation.  Mask them in the child.  It resets the
 532            mask once it starts up.  */
 533           sigprocmask(SIG_SETMASK, &manager_mask_all, NULL);
 534           pid = __clone2(pthread_start_thread_event,
 535                  (void **)new_thread_bottom,
 536                          (char *)new_thread - new_thread_bottom,
 537                          CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 538                          __pthread_sig_cancel, new_thread);
 539           sigprocmask(SIG_SETMASK, &manager_mask, NULL);
 540 #else
 541           pid = __clone(pthread_start_thread_event, (void **) new_thread,
 542                         CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 543                         __pthread_sig_cancel, new_thread);
 544 #endif
 545           if (pid != -1)
 546             {
 547               /* Now fill in the information about the new thread in
 548                  the newly created thread's data structure.  We cannot let
 549                  the new thread do this since we don't know whether it was
 550                  already scheduled when we send the event.  */
 551               new_thread->p_eventbuf.eventdata = new_thread;
 552               new_thread->p_eventbuf.eventnum = TD_CREATE;
 553               __pthread_last_event = new_thread;
 554
 555               /* We have to set the PID here since the callback function
 556                  in the debug library will need it and we cannot guarantee
 557                  the child got scheduled before the debugger.  */
 558               new_thread->p_pid = pid;
 559
 560               /* Now call the function which signals the event.  */
 561               __linuxthreads_create_event ();
 562
 563               /* Now restart the thread.  */
 564               __pthread_unlock(new_thread->p_lock);
 565             }
 566         }
 567     }
 568   if (pid == 0)
 569     {
 570 #ifdef NEED_SEPARATE_REGISTER_STACK
 571       sigprocmask(SIG_SETMASK, &manager_mask_all, NULL);
 572       pid = __clone2(pthread_start_thread,
 573                      (void **)new_thread_bottom,
 574                      (char *)new_thread - new_thread_bottom,
 575                      CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 576                      __pthread_sig_cancel, new_thread);
 577       sigprocmask(SIG_SETMASK, &manager_mask, NULL);
 578 #else
 579       pid = __clone(pthread_start_thread, (void **) new_thread,
 580                     CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 581                     __pthread_sig_cancel, new_thread);
 582 #endif /* !NEED_SEPARATE_REGISTER_STACK */
 583     }
 584   /* Check if cloning succeeded */
 585   if (pid == -1) {
 586     /* Free the stack if we allocated it */
 587     if (attr == NULL || !attr->__stackaddr_set)
 588       {
 589 #ifdef NEED_SEPARATE_REGISTER_STACK
 590         size_t stacksize = ((char *)(new_thread->p_guardaddr)
 591                             - new_thread_bottom);
 592         munmap((caddr_t)new_thread_bottom, stacksize);
 593         munmap((caddr_t)new_thread_bottom + stacksize
 594                + new_thread->p_guardsize, stacksize);
 595 #else
 596         if (new_thread->p_guardsize != 0)
 597           munmap(new_thread->p_guardaddr, new_thread->p_guardsize);
 598         munmap((caddr_t)((char *)(new_thread+1) - INITIAL_STACK_SIZE),
 599                INITIAL_STACK_SIZE);
 600 #endif
 601       }
 602     __pthread_handles[sseg].h_descr = NULL;
 603     __pthread_handles[sseg].h_bottom = NULL;
 604     __pthread_handles_num--;
 605     return errno;
 606   }
 607   /* Insert new thread in doubly linked list of active threads */
 608   new_thread->p_prevlive = __pthread_main_thread;
 609   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
 610   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
 611   __pthread_main_thread->p_nextlive = new_thread;
 612   /* Set pid field of the new thread, in case we get there before the
 613      child starts. */
 614   new_thread->p_pid = pid;
 615   return 0;
 616 }
 617
 618
 619 /* Try to free the resources of a thread when requested by pthread_join
 620    or pthread_detach on a terminated thread. */
 621
 622 static void pthread_free(pthread_descr th)
 623 {
 624   pthread_handle handle;
 625   pthread_readlock_info *iter, *next;
 626
 627   ASSERT(th->p_exited);
 628   /* Make the handle invalid */
 629   handle =  thread_handle(th->p_tid);
 630   __pthread_lock(&handle->h_lock, NULL);
 631   handle->h_descr = NULL;
 632   handle->h_bottom = (char *)(-1L);
 633   __pthread_unlock(&handle->h_lock);
 634 #ifdef FREE_THREAD
 635   FREE_THREAD(th, th->p_nr);
 636 #endif
 637   /* One fewer threads in __pthread_handles */
 638   __pthread_handles_num--;
 639
 640   /* Destroy read lock list, and list of free read lock structures.
 641      If the former is not empty, it means the thread exited while
 642      holding read locks! */
 643
 644   for (iter = th->p_readlock_list; iter != NULL; iter = next)
 645     {
 646       next = iter->pr_next;
 647       free(iter);
 648     }
 649
 650   for (iter = th->p_readlock_free; iter != NULL; iter = next)
 651     {
 652       next = iter->pr_next;
 653       free(iter);
 654     }
 655
 656   /* If initial thread, nothing to free */
 657   if (th == &__pthread_initial_thread) return;
 658   if (!th->p_userstack)
 659     {
 660       size_t guardsize = th->p_guardsize;
 661       /* Free the stack and thread descriptor area */
 662 #ifdef NEED_SEPARATE_REGISTER_STACK
 663       char *guardaddr = th->p_guardaddr;
 664       /* We unmap exactly what we mapped, in case there was something
 665          else in the same region.  Guardaddr is always set, eve if
 666          guardsize is 0.  This allows us to compute everything else.  */
 667       size_t stacksize = (char *)(th+1) - guardaddr - guardsize;
 668       /* Unmap the register stack, which is below guardaddr.  */
 669       munmap((caddr_t)(guardaddr-stacksize), stacksize);
 670       /* Unmap the main stack.  */
 671       munmap((caddr_t)(guardaddr+guardsize), stacksize);
 672 #else
 673       /* The following assumes that we only allocate stacks of one
 674          size.  That's currently true but probably shouldn't be.  This
 675          looks like it fails for growing stacks if there was something
 676          else mapped just below the stack?  */
 677       if (guardsize != 0)
 678         munmap(th->p_guardaddr, guardsize);
 679       munmap((caddr_t) ((char *)(th+1) - STACK_SIZE), STACK_SIZE);
 680 #endif
 681     }
 682 }
 683
 684 /* Handle threads that have exited */
 685
 686 static void pthread_exited(pid_t pid)
 687 {
 688   pthread_descr th;
 689   int detached;
 690   /* Find thread with that pid */
 691   for (th = __pthread_main_thread->p_nextlive;
 692        th != __pthread_main_thread;
 693        th = th->p_nextlive) {
 694     if (th->p_pid == pid) {
 695       /* Remove thread from list of active threads */
 696       th->p_nextlive->p_prevlive = th->p_prevlive;
 697       th->p_prevlive->p_nextlive = th->p_nextlive;
 698       /* Mark thread as exited, and if detached, free its resources */
 699       __pthread_lock(th->p_lock, NULL);
 700       th->p_exited = 1;
 701       /* If we have to signal this event do it now.  */
 702       if (th->p_report_events)
 703         {
 704           /* See whether TD_DEATH is in any of the mask.  */
 705           int idx = __td_eventword (TD_REAP);
 706           uint32_t mask = __td_eventmask (TD_REAP);
 707
 708           if ((mask & (__pthread_threads_events.event_bits[idx]
 709                        | th->p_eventbuf.eventmask.event_bits[idx])) != 0)
 710             {
 711               /* Yep, we have to signal the death.  */
 712               th->p_eventbuf.eventnum = TD_DEATH;
 713               th->p_eventbuf.eventdata = th;
 714               __pthread_last_event = th;
 715
 716               /* Now call the function to signal the event.  */
 717               __linuxthreads_reap_event();
 718             }
 719         }
 720       detached = th->p_detached;
 721       __pthread_unlock(th->p_lock);
 722       if (detached)
 723         pthread_free(th);
 724       break;
 725     }
 726   }
 727   /* If all threads have exited and the main thread is pending on a
 728      pthread_exit, wake up the main thread and terminate ourselves. */
 729   if (main_thread_exiting &&
 730       __pthread_main_thread->p_nextlive == __pthread_main_thread) {
 731     restart(__pthread_main_thread);
 732     /* Same logic as REQ_MAIN_THREAD_EXIT. */
 733   }
 734 }
 735
 736 static void pthread_reap_children(void)
 737 {
 738   pid_t pid;
 739   int status;
 740
 741   while ((pid = __libc_waitpid(-1, &status, WNOHANG | __WCLONE)) > 0) {
 742     pthread_exited(pid);
 743     if (WIFSIGNALED(status)) {
 744       /* If a thread died due to a signal, send the same signal to
 745          all other threads, including the main thread. */
 746       pthread_kill_all_threads(WTERMSIG(status), 1);
 747       _exit(0);
 748     }
 749   }
 750 }
 751
 752 /* Try to free the resources of a thread when requested by pthread_join
 753    or pthread_detach on a terminated thread. */
 754
 755 static void pthread_handle_free(pthread_t th_id)
 756 {
 757   pthread_handle handle = thread_handle(th_id);
 758   pthread_descr th;
 759
 760   __pthread_lock(&handle->h_lock, NULL);
 761   if (nonexisting_handle(handle, th_id)) {
 762     /* pthread_reap_children has deallocated the thread already,
 763        nothing needs to be done */
 764     __pthread_unlock(&handle->h_lock);
 765     return;
 766   }
 767   th = handle->h_descr;
 768   if (th->p_exited) {
 769     __pthread_unlock(&handle->h_lock);
 770     pthread_free(th);
 771   } else {
 772     /* The Unix process of the thread is still running.
 773        Mark the thread as detached so that the thread manager will
 774        deallocate its resources when the Unix process exits. */
 775     th->p_detached = 1;
 776     __pthread_unlock(&handle->h_lock);
 777   }
 778 }
 779
 780 /* Send a signal to all running threads */
 781
 782 static void pthread_kill_all_threads(int sig, int main_thread_also)
 783 {
 784   pthread_descr th;
 785   for (th = __pthread_main_thread->p_nextlive;
 786        th != __pthread_main_thread;
 787        th = th->p_nextlive) {
 788     kill(th->p_pid, sig);
 789   }
 790   if (main_thread_also) {
 791     kill(__pthread_main_thread->p_pid, sig);
 792   }
 793 }
 794
 795 /* Process-wide exit() */
 796
 797 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
 798 {
 799   pthread_descr th;
 800   __pthread_exit_requested = 1;
 801   __pthread_exit_code = exitcode;
 802   /* Send the CANCEL signal to all running threads, including the main
 803      thread, but excluding the thread from which the exit request originated
 804      (that thread must complete the exit, e.g. calling atexit functions
 805      and flushing stdio buffers). */
 806   for (th = issuing_thread->p_nextlive;
 807        th != issuing_thread;
 808        th = th->p_nextlive) {
 809     kill(th->p_pid, __pthread_sig_cancel);
 810   }
 811   /* Now, wait for all these threads, so that they don't become zombies
 812      and their times are properly added to the thread manager's times. */
 813   for (th = issuing_thread->p_nextlive;
 814        th != issuing_thread;
 815        th = th->p_nextlive) {
 816     waitpid(th->p_pid, NULL, __WCLONE);
 817   }
 818   restart(issuing_thread);
 819   _exit(0);
 820 }
 821
 822 /* Handler for __pthread_sig_cancel in thread manager thread */
 823
 824 void __pthread_manager_sighandler(int sig)
 825 {
 826   int kick_manager = terminated_children == 0 && main_thread_exiting;
 827   terminated_children = 1;
 828
 829   /* If the main thread is terminating, kick the thread manager loop
 830      each time some threads terminate. This eliminates a two second
 831      shutdown delay caused by the thread manager sleeping in the
 832      call to __poll(). Instead, the thread manager is kicked into
 833      action, reaps the outstanding threads and resumes the main thread
 834      so that it can complete the shutdown. */
 835
 836   if (kick_manager) {
 837     struct pthread_request request;
 838     request.req_thread = 0;
 839     request.req_kind = REQ_KICK;
 840     __libc_write(__pthread_manager_request, (char *) &request, sizeof(request));
 841   }
 842 }
 843
 844 /* Adjust priority of thread manager so that it always run at a priority
 845    higher than all threads */
 846
 847 void __pthread_manager_adjust_prio(int thread_prio)
 848 {
 849   struct sched_param param;
 850
 851   if (thread_prio <= __pthread_manager_thread.p_priority) return;
 852   param.sched_priority =
 853     thread_prio < __sched_get_priority_max(SCHED_FIFO)
 854     ? thread_prio + 1 : thread_prio;
 855   __sched_setscheduler(__pthread_manager_thread.p_pid, SCHED_FIFO, &param);
 856   __pthread_manager_thread.p_priority = thread_prio;
 857 }