linuxthreads/manager.c

   1 /* Linuxthreads - a simple clone()-based implementation of Posix        */
   2 /* threads for Linux.                                                   */
   3 /* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr)              */
   4 /*                                                                      */
   5 /* This program is free software; you can redistribute it and/or        */
   6 /* modify it under the terms of the GNU Library General Public License  */
   7 /* as published by the Free Software Foundation; either version 2       */
   8 /* of the License, or (at your option) any later version.               */
   9 /*                                                                      */
  10 /* This program is distributed in the hope that it will be useful,      */
  11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
  12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
  13 /* GNU Library General Public License for more details.                 */
  14
  15 /* The "thread manager" thread: manages creation and termination of threads */
  16
  17 #include <errno.h>
  18 #include <sched.h>
  19 #include <stddef.h>
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <sys/poll.h>           /* for poll */
  25 #include <sys/mman.h>           /* for mmap */
  26 #include <sys/param.h>
  27 #include <sys/time.h>
  28 #include <sys/wait.h>           /* for waitpid macros */
  29
  30 #include "pthread.h"
  31 #include "internals.h"
  32 #include "spinlock.h"
  33 #include "restart.h"
  34 #include "semaphore.h"
  35
  36 /* Array of active threads. Entry 0 is reserved for the initial thread. */
  37 struct pthread_handle_struct __pthread_handles[PTHREAD_THREADS_MAX] =
  38 { { __LOCK_INITIALIZER, &__pthread_initial_thread, 0},
  39   { __LOCK_INITIALIZER, &__pthread_manager_thread, 0}, /* All NULLs */ };
  40
  41 /* For debugging purposes put the maximum number of threads in a variable.  */
  42 const int __linuxthreads_pthread_threads_max = PTHREAD_THREADS_MAX;
  43
  44 #ifndef THREAD_SELF
  45 /* Indicate whether at least one thread has a user-defined stack (if 1),
  46    or if all threads have stacks supplied by LinuxThreads (if 0). */
  47 int __pthread_nonstandard_stacks;
  48 #endif
  49
  50 /* Number of active entries in __pthread_handles (used by gdb) */
  51 volatile int __pthread_handles_num = 2;
  52
  53 /* Whether to use debugger additional actions for thread creation
  54    (set to 1 by gdb) */
  55 volatile int __pthread_threads_debug;
  56
  57 /* Globally enabled events.  */
  58 volatile td_thr_events_t __pthread_threads_events;
  59
  60 /* Pointer to thread descriptor with last event.  */
  61 volatile pthread_descr __pthread_last_event;
  62
  63 /* Mapping from stack segment to thread descriptor. */
  64 /* Stack segment numbers are also indices into the __pthread_handles array. */
  65 /* Stack segment number 0 is reserved for the initial thread. */
  66
  67 #if FLOATING_STACKS
  68 # define thread_segment(seq) NULL
  69 #else
  70 static inline pthread_descr thread_segment(int seg)
  71 {
  72   return (pthread_descr)(THREAD_STACK_START_ADDRESS - (seg - 1) * STACK_SIZE)
  73          - 1;
  74 }
  75 #endif
  76
  77 /* Flag set in signal handler to record child termination */
  78
  79 static volatile int terminated_children;
  80
  81 /* Flag set when the initial thread is blocked on pthread_exit waiting
  82    for all other threads to terminate */
  83
  84 static int main_thread_exiting;
  85
  86 /* Counter used to generate unique thread identifier.
  87    Thread identifier is pthread_threads_counter + segment. */
  88
  89 static pthread_t pthread_threads_counter;
  90
  91 /* Forward declarations */
  92
  93 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
  94                                  void * (*start_routine)(void *), void *arg,
  95                                  sigset_t *mask, int father_pid,
  96                                  int report_events,
  97                                  td_thr_events_t *event_maskp);
  98 static void pthread_handle_free(pthread_t th_id);
  99 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode);
 100 static void pthread_reap_children(void);
 101 static void pthread_kill_all_threads(int sig, int main_thread_also);
 102
 103 /* The server thread managing requests for thread creation and termination */
 104
 105 int __pthread_manager(void *arg)
 106 {
 107   int reqfd = (int) (long int) arg;
 108   struct pollfd ufd;
 109   sigset_t manager_mask;
 110   int n;
 111   struct pthread_request request;
 112
 113   /* If we have special thread_self processing, initialize it.  */
 114 #ifdef INIT_THREAD_SELF
 115   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 116 #endif
 117   /* Set the error variable.  */
 118   __pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno;
 119   __pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno;
 120   /* Block all signals except __pthread_sig_cancel and SIGTRAP */
 121   sigfillset(&manager_mask);
 122   sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
 123   sigdelset(&manager_mask, SIGTRAP);            /* for debugging purposes */
 124   if (__pthread_threads_debug && __pthread_sig_debug > 0)
 125     sigdelset(&manager_mask, __pthread_sig_debug);
 126   sigprocmask(SIG_SETMASK, &manager_mask, NULL);
 127   /* Raise our priority to match that of main thread */
 128   __pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
 129   /* Synchronize debugging of the thread manager */
 130   n = __libc_read(reqfd, (char *)&request, sizeof(request));
 131   ASSERT(n == sizeof(request) && request.req_kind == REQ_DEBUG);
 132   ufd.fd = reqfd;
 133   ufd.events = POLLIN;
 134   /* Enter server loop */
 135   while(1) {
 136     n = __poll(&ufd, 1, 2000);
 137
 138     /* Check for termination of the main thread */
 139     if (getppid() == 1) {
 140       pthread_kill_all_threads(SIGKILL, 0);
 141       _exit(0);
 142     }
 143     /* Check for dead children */
 144     if (terminated_children) {
 145       terminated_children = 0;
 146       pthread_reap_children();
 147     }
 148     /* Read and execute request */
 149     if (n == 1 && (ufd.revents & POLLIN)) {
 150       n = __libc_read(reqfd, (char *)&request, sizeof(request));
 151       ASSERT(n == sizeof(request));
 152       switch(request.req_kind) {
 153       case REQ_CREATE:
 154         request.req_thread->p_retcode =
 155           pthread_handle_create((pthread_t *) &request.req_thread->p_retval,
 156                                 request.req_args.create.attr,
 157                                 request.req_args.create.fn,
 158                                 request.req_args.create.arg,
 159                                 &request.req_args.create.mask,
 160                                 request.req_thread->p_pid,
 161                                 request.req_thread->p_report_events,
 162                                 &request.req_thread->p_eventbuf.eventmask);
 163         restart(request.req_thread);
 164         break;
 165       case REQ_FREE:
 166         pthread_handle_free(request.req_args.free.thread_id);
 167         break;
 168       case REQ_PROCESS_EXIT:
 169         pthread_handle_exit(request.req_thread,
 170                             request.req_args.exit.code);
 171         /* NOTREACHED */
 172         break;
 173       case REQ_MAIN_THREAD_EXIT:
 174         main_thread_exiting = 1;
 175         /* Reap children in case all other threads died and the signal handler
 176            went off before we set main_thread_exiting to 1, and therefore did
 177            not do REQ_KICK. */
 178         pthread_reap_children();
 179
 180         if (__pthread_main_thread->p_nextlive == __pthread_main_thread) {
 181           restart(__pthread_main_thread);
 182           /* The main thread will now call exit() which will trigger an
 183              __on_exit handler, which in turn will send REQ_PROCESS_EXIT
 184              to the thread manager. In case you are wondering how the
 185              manager terminates from its loop here. */
 186         }
 187         break;
 188       case REQ_POST:
 189         __new_sem_post(request.req_args.post);
 190         break;
 191       case REQ_DEBUG:
 192         /* Make gdb aware of new thread and gdb will restart the
 193            new thread when it is ready to handle the new thread. */
 194         if (__pthread_threads_debug && __pthread_sig_debug > 0)
 195           raise(__pthread_sig_debug);
 196         break;
 197       case REQ_KICK:
 198         /* This is just a prod to get the manager to reap some
 199            threads right away, avoiding a potential delay at shutdown. */
 200         break;
 201       }
 202     }
 203   }
 204 }
 205
 206 int __pthread_manager_event(void *arg)
 207 {
 208   /* If we have special thread_self processing, initialize it.  */
 209 #ifdef INIT_THREAD_SELF
 210   INIT_THREAD_SELF(&__pthread_manager_thread, 1);
 211 #endif
 212
 213   /* Get the lock the manager will free once all is correctly set up.  */
 214   __pthread_lock (THREAD_GETMEM((&__pthread_manager_thread), p_lock), NULL);
 215   /* Free it immediately.  */
 216   __pthread_unlock (THREAD_GETMEM((&__pthread_manager_thread), p_lock));
 217
 218   return __pthread_manager(arg);
 219 }
 220
 221 /* Process creation */
 222
 223 static int pthread_start_thread(void *arg)
 224 {
 225   pthread_descr self = (pthread_descr) arg;
 226   struct pthread_request request;
 227   void * outcome;
 228   /* Initialize special thread_self processing, if any.  */
 229 #ifdef INIT_THREAD_SELF
 230   INIT_THREAD_SELF(self, self->p_nr);
 231 #endif
 232   /* Make sure our pid field is initialized, just in case we get there
 233      before our father has initialized it. */
 234   THREAD_SETMEM(self, p_pid, __getpid());
 235   /* Initial signal mask is that of the creating thread. (Otherwise,
 236      we'd just inherit the mask of the thread manager.) */
 237   sigprocmask(SIG_SETMASK, &self->p_start_args.mask, NULL);
 238   /* Set the scheduling policy and priority for the new thread, if needed */
 239   if (THREAD_GETMEM(self, p_start_args.schedpolicy) >= 0)
 240     /* Explicit scheduling attributes were provided: apply them */
 241     __sched_setscheduler(THREAD_GETMEM(self, p_pid),
 242                          THREAD_GETMEM(self, p_start_args.schedpolicy),
 243                          &self->p_start_args.schedparam);
 244   else if (__pthread_manager_thread.p_priority > 0)
 245     /* Default scheduling required, but thread manager runs in realtime
 246        scheduling: switch new thread to SCHED_OTHER policy */
 247     {
 248       struct sched_param default_params;
 249       default_params.sched_priority = 0;
 250       __sched_setscheduler(THREAD_GETMEM(self, p_pid),
 251                            SCHED_OTHER, &default_params);
 252     }
 253   /* Make gdb aware of new thread */
 254   if (__pthread_threads_debug && __pthread_sig_debug > 0) {
 255     request.req_thread = self;
 256     request.req_kind = REQ_DEBUG;
 257     __libc_write(__pthread_manager_request,
 258                  (char *) &request, sizeof(request));
 259     suspend(self);
 260   }
 261   /* Run the thread code */
 262   outcome = self->p_start_args.start_routine(THREAD_GETMEM(self,
 263                                                            p_start_args.arg));
 264   /* Exit with the given return value */
 265   pthread_exit(outcome);
 266   return 0;
 267 }
 268
 269 static int pthread_start_thread_event(void *arg)
 270 {
 271   pthread_descr self = (pthread_descr) arg;
 272
 273 #ifdef INIT_THREAD_SELF
 274   INIT_THREAD_SELF(self, self->p_nr);
 275 #endif
 276   /* Make sure our pid field is initialized, just in case we get there
 277      before our father has initialized it. */
 278   THREAD_SETMEM(self, p_pid, __getpid());
 279   /* Get the lock the manager will free once all is correctly set up.  */
 280   __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
 281   /* Free it immediately.  */
 282   __pthread_unlock (THREAD_GETMEM(self, p_lock));
 283
 284   /* Continue with the real function.  */
 285   return pthread_start_thread (arg);
 286 }
 287
 288 static int pthread_allocate_stack(const pthread_attr_t *attr,
 289                                   pthread_descr default_new_thread,
 290                                   int pagesize,
 291                                   pthread_descr * out_new_thread,
 292                                   char ** out_new_thread_bottom,
 293                                   char ** out_guardaddr,
 294                                   size_t * out_guardsize)
 295 {
 296   pthread_descr new_thread;
 297   char * new_thread_bottom;
 298   char * guardaddr;
 299   size_t stacksize, guardsize;
 300
 301   if (attr != NULL && attr->__stackaddr_set)
 302     {
 303       /* The user provided a stack.  For now we interpret the supplied
 304          address as 1 + the highest addr. in the stack segment.  If a
 305          separate register stack is needed, we place it at the low end
 306          of the segment, relying on the associated stacksize to
 307          determine the low end of the segment.  This differs from many
 308          (but not all) other pthreads implementations.  The intent is
 309          that on machines with a single stack growing toward higher
 310          addresses, stackaddr would be the lowest address in the stack
 311          segment, so that it is consistently close to the initial sp
 312          value. */
 313       new_thread =
 314         (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
 315       new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
 316       guardaddr = new_thread_bottom;
 317       guardsize = 0;
 318 #ifndef THREAD_SELF
 319       __pthread_nonstandard_stacks = 1;
 320 #endif
 321       /* Clear the thread data structure.  */
 322       memset (new_thread, '\0', sizeof (*new_thread));
 323     }
 324   else
 325     {
 326 #ifdef NEED_SEPARATE_REGISTER_STACK
 327       size_t granularity = 2 * pagesize;
 328       /* Try to make stacksize/2 a multiple of pagesize */
 329 #else
 330       size_t granularity = pagesize;
 331 #endif
 332       void *map_addr;
 333
 334       /* Allocate space for stack and thread descriptor at default address */
 335 #ifdef NEED_SEPARATE_REGISTER_STACK
 336       void *res_addr;
 337
 338       if (attr != NULL)
 339         {
 340           guardsize = page_roundup (attr->__guardsize, granularity);
 341           stacksize = STACK_SIZE - guardsize;
 342           stacksize = MIN (stacksize,
 343                            page_roundup (attr->__stacksize, granularity));
 344         }
 345       else
 346         {
 347           guardsize = granularity;
 348           stacksize = STACK_SIZE - granularity;
 349         }
 350
 351       new_thread = default_new_thread;
 352       new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize;
 353       /* Includes guard area, unlike the normal case.  Use the bottom
 354        end of the segment as backing store for the register stack.
 355        Needed on IA64.  In this case, we also map the entire stack at
 356        once.  According to David Mosberger, that's cheaper.  It also
 357        avoids the risk of intermittent failures due to other mappings
 358        in the same region.  The cost is that we might be able to map
 359        slightly fewer stacks.  */
 360
 361       /* XXX Fix for floating stacks with variable sizes.  */
 362
 363       /* First the main stack: */
 364       map_addr = (caddr_t)((char *)(new_thread + 1) - stacksize / 2);
 365       res_addr = mmap(map_addr, stacksize / 2,
 366                       PROT_READ | PROT_WRITE | PROT_EXEC,
 367                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 368       if (res_addr != map_addr)
 369         {
 370           /* Bad luck, this segment is already mapped. */
 371           if (res_addr != MAP_FAILED)
 372             munmap(res_addr, stacksize / 2);
 373           return -1;
 374         }
 375       /* Then the register stack:       */
 376       map_addr = (caddr_t)new_thread_bottom;
 377       res_addr = mmap(map_addr, stacksize/2,
 378                       PROT_READ | PROT_WRITE | PROT_EXEC,
 379                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 380       if (res_addr != map_addr)
 381         {
 382           if (res_addr != MAP_FAILED)
 383             munmap(res_addr, stacksize / 2);
 384           munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2),
 385                  stacksize/2);
 386           return -1;
 387         }
 388
 389       guardaddr = new_thread_bottom + stacksize/2;
 390       /* We leave the guard area in the middle unmapped.        */
 391 #else  /* !NEED_SEPARATE_REGISTER_STACK */
 392 # if FLOATING_STACKS
 393       if (attr != NULL)
 394         {
 395           guardsize = page_roundup (attr->__guardsize, granularity);
 396           stacksize = __pthread_max_stacksize - guardsize;
 397           stacksize = MIN (stacksize,
 398                            page_roundup (attr->__stacksize, granularity));
 399         }
 400       else
 401         {
 402           guardsize = granularity;
 403           stacksize = __pthread_max_stacksize - guardsize;
 404         }
 405
 406       map_addr = mmap(NULL, stacksize + guardsize,
 407                       PROT_READ | PROT_WRITE | PROT_EXEC,
 408                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 409       if (map_addr == MAP_FAILED)
 410         /* No more memory available.  */
 411         return -1;
 412
 413       guardaddr = map_addr;
 414       if (guardsize > 0)
 415         mprotect (guardaddr, guardsize, PROT_NONE);
 416
 417       new_thread_bottom = (char *) map_addr + guardsize;
 418       new_thread = ((pthread_descr) (new_thread_bottom + stacksize)) - 1;
 419 # else /* !FLOATING_STACKS */
 420       void *res_addr;
 421
 422       if (attr != NULL)
 423         {
 424           guardsize = page_roundup (attr->__guardsize, granularity);
 425           stacksize = STACK_SIZE - guardsize;
 426           stacksize = MIN (stacksize,
 427                            page_roundup (attr->__stacksize, granularity));
 428         }
 429       else
 430         {
 431           guardsize = granularity;
 432           stacksize = STACK_SIZE - granularity;
 433         }
 434
 435       new_thread = default_new_thread;
 436       new_thread_bottom = (char *) (new_thread + 1) - stacksize;
 437       map_addr = new_thread_bottom - guardsize;
 438       res_addr = mmap(map_addr, stacksize + guardsize,
 439                       PROT_READ | PROT_WRITE | PROT_EXEC,
 440                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 441       if (res_addr != map_addr)
 442         {
 443           /* Bad luck, this segment is already mapped. */
 444           if (res_addr != MAP_FAILED)
 445             munmap (res_addr, stacksize + guardsize);
 446           return -1;
 447         }
 448
 449       /* We manage to get a stack.  Protect the guard area pages if
 450          necessary.  */
 451       guardaddr = map_addr;
 452       if (guardsize > 0)
 453         mprotect (guardaddr, guardsize, PROT_NONE);
 454 # endif
 455 #endif /* !NEED_SEPARATE_REGISTER_STACK */
 456     }
 457   *out_new_thread = new_thread;
 458   *out_new_thread_bottom = new_thread_bottom;
 459   *out_guardaddr = guardaddr;
 460   *out_guardsize = guardsize;
 461   return 0;
 462 }
 463
 464 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
 465                                  void * (*start_routine)(void *), void *arg,
 466                                  sigset_t * mask, int father_pid,
 467                                  int report_events,
 468                                  td_thr_events_t *event_maskp)
 469 {
 470   size_t sseg;
 471   int pid;
 472   pthread_descr new_thread;
 473   char * new_thread_bottom;
 474   pthread_t new_thread_id;
 475   char *guardaddr = NULL;
 476   size_t guardsize = 0;
 477   int pagesize = __getpagesize();
 478
 479   /* First check whether we have to change the policy and if yes, whether
 480      we can  do this.  Normally this should be done by examining the
 481      return value of the __sched_setscheduler call in pthread_start_thread
 482      but this is hard to implement.  FIXME  */
 483   if (attr != NULL && attr->__schedpolicy != SCHED_OTHER && geteuid () != 0)
 484     return EPERM;
 485   /* Find a free segment for the thread, and allocate a stack if needed */
 486   for (sseg = 2; ; sseg++)
 487     {
 488       if (sseg >= PTHREAD_THREADS_MAX)
 489         return EAGAIN;
 490       if (__pthread_handles[sseg].h_descr != NULL)
 491         continue;
 492       if (pthread_allocate_stack(attr, thread_segment(sseg),
 493                                  pagesize,
 494                                  &new_thread, &new_thread_bottom,
 495                                  &guardaddr, &guardsize) == 0)
 496         break;
 497     }
 498   __pthread_handles_num++;
 499   /* Allocate new thread identifier */
 500   pthread_threads_counter += PTHREAD_THREADS_MAX;
 501   new_thread_id = sseg + pthread_threads_counter;
 502   /* Initialize the thread descriptor.  Elements which have to be
 503      initialized to zero already have this value.  */
 504   new_thread->p_tid = new_thread_id;
 505   new_thread->p_lock = &(__pthread_handles[sseg].h_lock);
 506   new_thread->p_cancelstate = PTHREAD_CANCEL_ENABLE;
 507   new_thread->p_canceltype = PTHREAD_CANCEL_DEFERRED;
 508   new_thread->p_errnop = &new_thread->p_errno;
 509   new_thread->p_h_errnop = &new_thread->p_h_errno;
 510   new_thread->p_resp = &new_thread->p_res;
 511   new_thread->p_guardaddr = guardaddr;
 512   new_thread->p_guardsize = guardsize;
 513   new_thread->p_header.data.self = new_thread;
 514   new_thread->p_nr = sseg;
 515   new_thread->p_inheritsched = attr ? attr->__inheritsched : 0;
 516   /* Initialize the thread handle */
 517   __pthread_init_lock(&__pthread_handles[sseg].h_lock);
 518   __pthread_handles[sseg].h_descr = new_thread;
 519   __pthread_handles[sseg].h_bottom = new_thread_bottom;
 520   /* Determine scheduling parameters for the thread */
 521   new_thread->p_start_args.schedpolicy = -1;
 522   if (attr != NULL) {
 523     new_thread->p_detached = attr->__detachstate;
 524     new_thread->p_userstack = attr->__stackaddr_set;
 525
 526     switch(attr->__inheritsched) {
 527     case PTHREAD_EXPLICIT_SCHED:
 528       new_thread->p_start_args.schedpolicy = attr->__schedpolicy;
 529       memcpy (&new_thread->p_start_args.schedparam, &attr->__schedparam,
 530               sizeof (struct sched_param));
 531       break;
 532     case PTHREAD_INHERIT_SCHED:
 533       new_thread->p_start_args.schedpolicy = __sched_getscheduler(father_pid);
 534       __sched_getparam(father_pid, &new_thread->p_start_args.schedparam);
 535       break;
 536     }
 537     new_thread->p_priority =
 538       new_thread->p_start_args.schedparam.sched_priority;
 539   }
 540   /* Finish setting up arguments to pthread_start_thread */
 541   new_thread->p_start_args.start_routine = start_routine;
 542   new_thread->p_start_args.arg = arg;
 543   new_thread->p_start_args.mask = *mask;
 544   /* Make the new thread ID available already now.  If any of the later
 545      functions fail we return an error value and the caller must not use
 546      the stored thread ID.  */
 547   *thread = new_thread_id;
 548   /* Raise priority of thread manager if needed */
 549   __pthread_manager_adjust_prio(new_thread->p_priority);
 550   /* Do the cloning.  We have to use two different functions depending
 551      on whether we are debugging or not.  */
 552   pid = 0;      /* Note that the thread never can have PID zero.  */
 553   if (report_events)
 554     {
 555       /* See whether the TD_CREATE event bit is set in any of the
 556          masks.  */
 557       int idx = __td_eventword (TD_CREATE);
 558       uint32_t mask = __td_eventmask (TD_CREATE);
 559
 560       if ((mask & (__pthread_threads_events.event_bits[idx]
 561                    | event_maskp->event_bits[idx])) != 0)
 562         {
 563           /* Lock the mutex the child will use now so that it will stop.  */
 564           __pthread_lock(new_thread->p_lock, NULL);
 565
 566           /* We have to report this event.  */
 567 #ifdef NEED_SEPARATE_REGISTER_STACK
 568           /* Perhaps this version should be used on all platforms. But
 569            this requires that __clone2 be uniformly supported
 570            everywhere.
 571
 572            And there is some argument for changing the __clone2
 573            interface to pass sp and bsp instead, making it more IA64
 574            specific, but allowing stacks to grow outward from each
 575            other, to get less paging and fewer mmaps.  */
 576           pid = __clone2(pthread_start_thread_event,
 577                  (void **)new_thread_bottom,
 578                          (char *)new_thread - new_thread_bottom,
 579                          CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 580                          __pthread_sig_cancel, new_thread);
 581 #else
 582           pid = __clone(pthread_start_thread_event, (void **) new_thread,
 583                         CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 584                         __pthread_sig_cancel, new_thread);
 585 #endif
 586           if (pid != -1)
 587             {
 588               /* Now fill in the information about the new thread in
 589                  the newly created thread's data structure.  We cannot let
 590                  the new thread do this since we don't know whether it was
 591                  already scheduled when we send the event.  */
 592               new_thread->p_eventbuf.eventdata = new_thread;
 593               new_thread->p_eventbuf.eventnum = TD_CREATE;
 594               __pthread_last_event = new_thread;
 595
 596               /* We have to set the PID here since the callback function
 597                  in the debug library will need it and we cannot guarantee
 598                  the child got scheduled before the debugger.  */
 599               new_thread->p_pid = pid;
 600
 601               /* Now call the function which signals the event.  */
 602               __linuxthreads_create_event ();
 603
 604               /* Now restart the thread.  */
 605               __pthread_unlock(new_thread->p_lock);
 606             }
 607         }
 608     }
 609   if (pid == 0)
 610     {
 611 #ifdef NEED_SEPARATE_REGISTER_STACK
 612       pid = __clone2(pthread_start_thread,
 613                      (void **)new_thread_bottom,
 614                      (char *)new_thread - new_thread_bottom,
 615                      CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 616                      __pthread_sig_cancel, new_thread);
 617 #else
 618       pid = __clone(pthread_start_thread, (void **) new_thread,
 619                     CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
 620                     __pthread_sig_cancel, new_thread);
 621 #endif /* !NEED_SEPARATE_REGISTER_STACK */
 622     }
 623   /* Check if cloning succeeded */
 624   if (pid == -1) {
 625     /* Free the stack if we allocated it */
 626     if (attr == NULL || !attr->__stackaddr_set)
 627       {
 628 #ifdef NEED_SEPARATE_REGISTER_STACK
 629         size_t stacksize = ((char *)(new_thread->p_guardaddr)
 630                             - new_thread_bottom);
 631         munmap((caddr_t)new_thread_bottom,
 632                2 * stacksize + new_thread->p_guardsize);
 633 #else
 634         size_t stacksize = (char *)(new_thread+1) - new_thread_bottom;
 635         munmap(new_thread_bottom - guardsize, guardsize + stacksize);
 636 #endif
 637       }
 638     __pthread_handles[sseg].h_descr = NULL;
 639     __pthread_handles[sseg].h_bottom = NULL;
 640     __pthread_handles_num--;
 641     return errno;
 642   }
 643   /* Insert new thread in doubly linked list of active threads */
 644   new_thread->p_prevlive = __pthread_main_thread;
 645   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
 646   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
 647   __pthread_main_thread->p_nextlive = new_thread;
 648   /* Set pid field of the new thread, in case we get there before the
 649      child starts. */
 650   new_thread->p_pid = pid;
 651   return 0;
 652 }
 653
 654
 655 /* Try to free the resources of a thread when requested by pthread_join
 656    or pthread_detach on a terminated thread. */
 657
 658 static void pthread_free(pthread_descr th)
 659 {
 660   pthread_handle handle;
 661   pthread_readlock_info *iter, *next;
 662
 663   ASSERT(th->p_exited);
 664   /* Make the handle invalid */
 665   handle =  thread_handle(th->p_tid);
 666   __pthread_lock(&handle->h_lock, NULL);
 667   handle->h_descr = NULL;
 668   handle->h_bottom = (char *)(-1L);
 669   __pthread_unlock(&handle->h_lock);
 670 #ifdef FREE_THREAD
 671   FREE_THREAD(th, th->p_nr);
 672 #endif
 673   /* One fewer threads in __pthread_handles */
 674   __pthread_handles_num--;
 675
 676   /* Destroy read lock list, and list of free read lock structures.
 677      If the former is not empty, it means the thread exited while
 678      holding read locks! */
 679
 680   for (iter = th->p_readlock_list; iter != NULL; iter = next)
 681     {
 682       next = iter->pr_next;
 683       free(iter);
 684     }
 685
 686   for (iter = th->p_readlock_free; iter != NULL; iter = next)
 687     {
 688       next = iter->pr_next;
 689       free(iter);
 690     }
 691
 692   /* If initial thread, nothing to free */
 693   if (!th->p_userstack)
 694     {
 695       size_t guardsize = th->p_guardsize;
 696       /* Free the stack and thread descriptor area */
 697       char *guardaddr = th->p_guardaddr;
 698       /* Guardaddr is always set, even if guardsize is 0.  This allows
 699          us to compute everything else.  */
 700       size_t stacksize = (char *)(th+1) - guardaddr - guardsize;
 701 #ifdef NEED_SEPARATE_REGISTER_STACK
 702       /* Take account of the register stack, which is below guardaddr.  */
 703       guardaddr -= stacksize;
 704       stacksize *= 2;
 705 #endif
 706       /* Unmap the stack.  */
 707       munmap(guardaddr, stacksize + guardsize);
 708     }
 709 }
 710
 711 /* Handle threads that have exited */
 712
 713 static void pthread_exited(pid_t pid)
 714 {
 715   pthread_descr th;
 716   int detached;
 717   /* Find thread with that pid */
 718   for (th = __pthread_main_thread->p_nextlive;
 719        th != __pthread_main_thread;
 720        th = th->p_nextlive) {
 721     if (th->p_pid == pid) {
 722       /* Remove thread from list of active threads */
 723       th->p_nextlive->p_prevlive = th->p_prevlive;
 724       th->p_prevlive->p_nextlive = th->p_nextlive;
 725       /* Mark thread as exited, and if detached, free its resources */
 726       __pthread_lock(th->p_lock, NULL);
 727       th->p_exited = 1;
 728       /* If we have to signal this event do it now.  */
 729       if (th->p_report_events)
 730         {
 731           /* See whether TD_REAP is in any of the mask.  */
 732           int idx = __td_eventword (TD_REAP);
 733           uint32_t mask = __td_eventmask (TD_REAP);
 734
 735           if ((mask & (__pthread_threads_events.event_bits[idx]
 736                        | th->p_eventbuf.eventmask.event_bits[idx])) != 0)
 737             {
 738               /* Yep, we have to signal the reapage.  */
 739               th->p_eventbuf.eventnum = TD_REAP;
 740               th->p_eventbuf.eventdata = th;
 741               __pthread_last_event = th;
 742
 743               /* Now call the function to signal the event.  */
 744               __linuxthreads_reap_event();
 745             }
 746         }
 747       detached = th->p_detached;
 748       __pthread_unlock(th->p_lock);
 749       if (detached)
 750         pthread_free(th);
 751       break;
 752     }
 753   }
 754   /* If all threads have exited and the main thread is pending on a
 755      pthread_exit, wake up the main thread and terminate ourselves. */
 756   if (main_thread_exiting &&
 757       __pthread_main_thread->p_nextlive == __pthread_main_thread) {
 758     restart(__pthread_main_thread);
 759     /* Same logic as REQ_MAIN_THREAD_EXIT. */
 760   }
 761 }
 762
 763 static void pthread_reap_children(void)
 764 {
 765   pid_t pid;
 766   int status;
 767
 768   while ((pid = __libc_waitpid(-1, &status, WNOHANG | __WCLONE)) > 0) {
 769     pthread_exited(pid);
 770     if (WIFSIGNALED(status)) {
 771       /* If a thread died due to a signal, send the same signal to
 772          all other threads, including the main thread. */
 773       pthread_kill_all_threads(WTERMSIG(status), 1);
 774       _exit(0);
 775     }
 776   }
 777 }
 778
 779 /* Try to free the resources of a thread when requested by pthread_join
 780    or pthread_detach on a terminated thread. */
 781
 782 static void pthread_handle_free(pthread_t th_id)
 783 {
 784   pthread_handle handle = thread_handle(th_id);
 785   pthread_descr th;
 786
 787   __pthread_lock(&handle->h_lock, NULL);
 788   if (nonexisting_handle(handle, th_id)) {
 789     /* pthread_reap_children has deallocated the thread already,
 790        nothing needs to be done */
 791     __pthread_unlock(&handle->h_lock);
 792     return;
 793   }
 794   th = handle->h_descr;
 795   if (th->p_exited) {
 796     __pthread_unlock(&handle->h_lock);
 797     pthread_free(th);
 798   } else {
 799     /* The Unix process of the thread is still running.
 800        Mark the thread as detached so that the thread manager will
 801        deallocate its resources when the Unix process exits. */
 802     th->p_detached = 1;
 803     __pthread_unlock(&handle->h_lock);
 804   }
 805 }
 806
 807 /* Send a signal to all running threads */
 808
 809 static void pthread_kill_all_threads(int sig, int main_thread_also)
 810 {
 811   pthread_descr th;
 812   for (th = __pthread_main_thread->p_nextlive;
 813        th != __pthread_main_thread;
 814        th = th->p_nextlive) {
 815     kill(th->p_pid, sig);
 816   }
 817   if (main_thread_also) {
 818     kill(__pthread_main_thread->p_pid, sig);
 819   }
 820 }
 821
 822 /* Process-wide exit() */
 823
 824 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
 825 {
 826   pthread_descr th;
 827   __pthread_exit_requested = 1;
 828   __pthread_exit_code = exitcode;
 829   /* Send the CANCEL signal to all running threads, including the main
 830      thread, but excluding the thread from which the exit request originated
 831      (that thread must complete the exit, e.g. calling atexit functions
 832      and flushing stdio buffers). */
 833   for (th = issuing_thread->p_nextlive;
 834        th != issuing_thread;
 835        th = th->p_nextlive) {
 836     kill(th->p_pid, __pthread_sig_cancel);
 837   }
 838   /* Now, wait for all these threads, so that they don't become zombies
 839      and their times are properly added to the thread manager's times. */
 840   for (th = issuing_thread->p_nextlive;
 841        th != issuing_thread;
 842        th = th->p_nextlive) {
 843     waitpid(th->p_pid, NULL, __WCLONE);
 844   }
 845   restart(issuing_thread);
 846   _exit(0);
 847 }
 848
 849 /* Handler for __pthread_sig_cancel in thread manager thread */
 850
 851 void __pthread_manager_sighandler(int sig)
 852 {
 853   int kick_manager = terminated_children == 0 && main_thread_exiting;
 854   terminated_children = 1;
 855
 856   /* If the main thread is terminating, kick the thread manager loop
 857      each time some threads terminate. This eliminates a two second
 858      shutdown delay caused by the thread manager sleeping in the
 859      call to __poll(). Instead, the thread manager is kicked into
 860      action, reaps the outstanding threads and resumes the main thread
 861      so that it can complete the shutdown. */
 862
 863   if (kick_manager) {
 864     struct pthread_request request;
 865     request.req_thread = 0;
 866     request.req_kind = REQ_KICK;
 867     __libc_write(__pthread_manager_request, (char *) &request, sizeof(request));
 868   }
 869 }
 870
 871 /* Adjust priority of thread manager so that it always run at a priority
 872    higher than all threads */
 873
 874 void __pthread_manager_adjust_prio(int thread_prio)
 875 {
 876   struct sched_param param;
 877
 878   if (thread_prio <= __pthread_manager_thread.p_priority) return;
 879   param.sched_priority =
 880     thread_prio < __sched_get_priority_max(SCHED_FIFO)
 881     ? thread_prio + 1 : thread_prio;
 882   __sched_setscheduler(__pthread_manager_thread.p_pid, SCHED_FIFO, &param);
 883   __pthread_manager_thread.p_priority = thread_prio;
 884 }