nptl/allocatestack.c

   1 /* Copyright (C) 2002-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <assert.h>
  20 #include <errno.h>
  21 #include <signal.h>
  22 #include <stdint.h>
  23 #include <string.h>
  24 #include <unistd.h>
  25 #include <sys/mman.h>
  26 #include <sys/param.h>
  27 #include <dl-sysdep.h>
  28 #include <dl-tls.h>
  29 #include <tls.h>
  30 #include <list.h>
  31 #include <lowlevellock.h>
  32 #include <kernel-features.h>
  33 #include <stack-aliasing.h>
  34
  35
  36 #ifndef NEED_SEPARATE_REGISTER_STACK
  37
  38 /* Most architectures have exactly one stack pointer.  Some have more.  */
  39 # define STACK_VARIABLES void *stackaddr = NULL
  40
  41 /* How to pass the values to the 'create_thread' function.  */
  42 # define STACK_VARIABLES_ARGS stackaddr
  43
  44 /* How to declare function which gets there parameters.  */
  45 # define STACK_VARIABLES_PARMS void *stackaddr
  46
  47 /* How to declare allocate_stack.  */
  48 # define ALLOCATE_STACK_PARMS void **stack
  49
  50 /* This is how the function is called.  We do it this way to allow
  51    other variants of the function to have more parameters.  */
  52 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
  53
  54 #else
  55
  56 /* We need two stacks.  The kernel will place them but we have to tell
  57    the kernel about the size of the reserved address space.  */
  58 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
  59
  60 /* How to pass the values to the 'create_thread' function.  */
  61 # define STACK_VARIABLES_ARGS stackaddr, stacksize
  62
  63 /* How to declare function which gets there parameters.  */
  64 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
  65
  66 /* How to declare allocate_stack.  */
  67 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
  68
  69 /* This is how the function is called.  We do it this way to allow
  70    other variants of the function to have more parameters.  */
  71 # define ALLOCATE_STACK(attr, pd) \
  72   allocate_stack (attr, pd, &stackaddr, &stacksize)
  73
  74 #endif
  75
  76
  77 /* Default alignment of stack.  */
  78 #ifndef STACK_ALIGN
  79 # define STACK_ALIGN __alignof__ (long double)
  80 #endif
  81
  82 /* Default value for minimal stack size after allocating thread
  83    descriptor and guard.  */
  84 #ifndef MINIMAL_REST_STACK
  85 # define MINIMAL_REST_STACK     4096
  86 #endif
  87
  88
  89 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
  90    a stack.  Use it when possible.  */
  91 #ifndef MAP_STACK
  92 # define MAP_STACK 0
  93 #endif
  94
  95 /* This yields the pointer that TLS support code calls the thread pointer.  */
  96 #if TLS_TCB_AT_TP
  97 # define TLS_TPADJ(pd) (pd)
  98 #elif TLS_DTV_AT_TP
  99 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
 100 #endif
 101
 102 /* Cache handling for not-yet free stacks.  */
 103
 104 /* Maximum size in kB of cache.  */
 105 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
 106 static size_t stack_cache_actsize;
 107
 108 /* Mutex protecting this variable.  */
 109 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
 110
 111 /* List of queued stack frames.  */
 112 static LIST_HEAD (stack_cache);
 113
 114 /* List of the stacks in use.  */
 115 static LIST_HEAD (stack_used);
 116
 117 /* We need to record what list operations we are going to do so that,
 118    in case of an asynchronous interruption due to a fork() call, we
 119    can correct for the work.  */
 120 static uintptr_t in_flight_stack;
 121
 122 /* List of the threads with user provided stacks in use.  No need to
 123    initialize this, since it's done in __pthread_initialize_minimal.  */
 124 list_t __stack_user __attribute__ ((nocommon));
 125 hidden_data_def (__stack_user)
 126
 127 #if COLORING_INCREMENT != 0
 128 /* Number of threads created.  */
 129 static unsigned int nptl_ncreated;
 130 #endif
 131
 132
 133 /* Check whether the stack is still used or not.  */
 134 #define FREE_P(descr) ((descr)->tid <= 0)
 135
 136
 137 static void
 138 stack_list_del (list_t *elem)
 139 {
 140   in_flight_stack = (uintptr_t) elem;
 141
 142   atomic_write_barrier ();
 143
 144   list_del (elem);
 145
 146   atomic_write_barrier ();
 147
 148   in_flight_stack = 0;
 149 }
 150
 151
 152 static void
 153 stack_list_add (list_t *elem, list_t *list)
 154 {
 155   in_flight_stack = (uintptr_t) elem | 1;
 156
 157   atomic_write_barrier ();
 158
 159   list_add (elem, list);
 160
 161   atomic_write_barrier ();
 162
 163   in_flight_stack = 0;
 164 }
 165
 166
 167 /* We create a double linked list of all cache entries.  Double linked
 168    because this allows removing entries from the end.  */
 169
 170
 171 /* Get a stack frame from the cache.  We have to match by size since
 172    some blocks might be too small or far too large.  */
 173 static struct pthread *
 174 get_cached_stack (size_t *sizep, void **memp)
 175 {
 176   size_t size = *sizep;
 177   struct pthread *result = NULL;
 178   list_t *entry;
 179
 180   lll_lock (stack_cache_lock, LLL_PRIVATE);
 181
 182   /* Search the cache for a matching entry.  We search for the
 183      smallest stack which has at least the required size.  Note that
 184      in normal situations the size of all allocated stacks is the
 185      same.  As the very least there are only a few different sizes.
 186      Therefore this loop will exit early most of the time with an
 187      exact match.  */
 188   list_for_each (entry, &stack_cache)
 189     {
 190       struct pthread *curr;
 191
 192       curr = list_entry (entry, struct pthread, list);
 193       if (FREE_P (curr) && curr->stackblock_size >= size)
 194         {
 195           if (curr->stackblock_size == size)
 196             {
 197               result = curr;
 198               break;
 199             }
 200
 201           if (result == NULL
 202               || result->stackblock_size > curr->stackblock_size)
 203             result = curr;
 204         }
 205     }
 206
 207   if (__builtin_expect (result == NULL, 0)
 208       /* Make sure the size difference is not too excessive.  In that
 209          case we do not use the block.  */
 210       || __builtin_expect (result->stackblock_size > 4 * size, 0))
 211     {
 212       /* Release the lock.  */
 213       lll_unlock (stack_cache_lock, LLL_PRIVATE);
 214
 215       return NULL;
 216     }
 217
 218   /* Don't allow setxid until cloned.  */
 219   result->setxid_futex = -1;
 220
 221   /* Dequeue the entry.  */
 222   stack_list_del (&result->list);
 223
 224   /* And add to the list of stacks in use.  */
 225   stack_list_add (&result->list, &stack_used);
 226
 227   /* And decrease the cache size.  */
 228   stack_cache_actsize -= result->stackblock_size;
 229
 230   /* Release the lock early.  */
 231   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 232
 233   /* Report size and location of the stack to the caller.  */
 234   *sizep = result->stackblock_size;
 235   *memp = result->stackblock;
 236
 237   /* Cancellation handling is back to the default.  */
 238   result->cancelhandling = 0;
 239   result->cleanup = NULL;
 240
 241   /* No pending event.  */
 242   result->nextevent = NULL;
 243
 244   /* Clear the DTV.  */
 245   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
 246   for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
 247     if (! dtv[1 + cnt].pointer.is_static
 248         && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
 249       free (dtv[1 + cnt].pointer.val);
 250   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 251
 252   /* Re-initialize the TLS.  */
 253   _dl_allocate_tls_init (TLS_TPADJ (result));
 254
 255   return result;
 256 }
 257
 258
 259 /* Free stacks until cache size is lower than LIMIT.  */
 260 void
 261 __free_stacks (size_t limit)
 262 {
 263   /* We reduce the size of the cache.  Remove the last entries until
 264      the size is below the limit.  */
 265   list_t *entry;
 266   list_t *prev;
 267
 268   /* Search from the end of the list.  */
 269   list_for_each_prev_safe (entry, prev, &stack_cache)
 270     {
 271       struct pthread *curr;
 272
 273       curr = list_entry (entry, struct pthread, list);
 274       if (FREE_P (curr))
 275         {
 276           /* Unlink the block.  */
 277           stack_list_del (entry);
 278
 279           /* Account for the freed memory.  */
 280           stack_cache_actsize -= curr->stackblock_size;
 281
 282           /* Free the memory associated with the ELF TLS.  */
 283           _dl_deallocate_tls (TLS_TPADJ (curr), false);
 284
 285           /* Remove this block.  This should never fail.  If it does
 286              something is really wrong.  */
 287           if (munmap (curr->stackblock, curr->stackblock_size) != 0)
 288             abort ();
 289
 290           /* Maybe we have freed enough.  */
 291           if (stack_cache_actsize <= limit)
 292             break;
 293         }
 294     }
 295 }
 296
 297
 298 /* Add a stack frame which is not used anymore to the stack.  Must be
 299    called with the cache lock held.  */
 300 static inline void
 301 __attribute ((always_inline))
 302 queue_stack (struct pthread *stack)
 303 {
 304   /* We unconditionally add the stack to the list.  The memory may
 305      still be in use but it will not be reused until the kernel marks
 306      the stack as not used anymore.  */
 307   stack_list_add (&stack->list, &stack_cache);
 308
 309   stack_cache_actsize += stack->stackblock_size;
 310   if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
 311     __free_stacks (stack_cache_maxsize);
 312 }
 313
 314
 315 static int
 316 internal_function
 317 change_stack_perm (struct pthread *pd
 318 #ifdef NEED_SEPARATE_REGISTER_STACK
 319                    , size_t pagemask
 320 #endif
 321                    )
 322 {
 323 #ifdef NEED_SEPARATE_REGISTER_STACK
 324   void *stack = (pd->stackblock
 325                  + (((((pd->stackblock_size - pd->guardsize) / 2)
 326                       & pagemask) + pd->guardsize) & pagemask));
 327   size_t len = pd->stackblock + pd->stackblock_size - stack;
 328 #elif _STACK_GROWS_DOWN
 329   void *stack = pd->stackblock + pd->guardsize;
 330   size_t len = pd->stackblock_size - pd->guardsize;
 331 #elif _STACK_GROWS_UP
 332   void *stack = pd->stackblock;
 333   size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
 334 #else
 335 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
 336 #endif
 337   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
 338     return errno;
 339
 340   return 0;
 341 }
 342
 343
 344 /* Returns a usable stack for a new thread either by allocating a
 345    new stack or reusing a cached stack of sufficient size.
 346    ATTR must be non-NULL and point to a valid pthread_attr.
 347    PDP must be non-NULL.  */
 348 static int
 349 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 350                 ALLOCATE_STACK_PARMS)
 351 {
 352   struct pthread *pd;
 353   size_t size;
 354   size_t pagesize_m1 = __getpagesize () - 1;
 355   void *stacktop;
 356
 357   assert (powerof2 (pagesize_m1 + 1));
 358   assert (TCB_ALIGNMENT >= STACK_ALIGN);
 359
 360   /* Get the stack size from the attribute if it is set.  Otherwise we
 361      use the default we determined at start time.  */
 362   if (attr->stacksize != 0)
 363     size = attr->stacksize;
 364   else
 365     {
 366       lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
 367       size = __default_pthread_attr.stacksize;
 368       lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 369     }
 370
 371   /* Get memory for the stack.  */
 372   if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
 373     {
 374       uintptr_t adj;
 375
 376       /* If the user also specified the size of the stack make sure it
 377          is large enough.  */
 378       if (attr->stacksize != 0
 379           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
 380         return EINVAL;
 381
 382       /* Adjust stack size for alignment of the TLS block.  */
 383 #if TLS_TCB_AT_TP
 384       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
 385             & __static_tls_align_m1;
 386       assert (size > adj + TLS_TCB_SIZE);
 387 #elif TLS_DTV_AT_TP
 388       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
 389             & __static_tls_align_m1;
 390       assert (size > adj);
 391 #endif
 392
 393       /* The user provided some memory.  Let's hope it matches the
 394          size...  We do not allocate guard pages if the user provided
 395          the stack.  It is the user's responsibility to do this if it
 396          is wanted.  */
 397 #if TLS_TCB_AT_TP
 398       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
 399                                - TLS_TCB_SIZE - adj);
 400 #elif TLS_DTV_AT_TP
 401       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
 402                                 - __static_tls_size - adj)
 403                                - TLS_PRE_TCB_SIZE);
 404 #endif
 405
 406       /* The user provided stack memory needs to be cleared.  */
 407       memset (pd, '\0', sizeof (struct pthread));
 408
 409       /* The first TSD block is included in the TCB.  */
 410       pd->specific[0] = pd->specific_1stblock;
 411
 412       /* Remember the stack-related values.  */
 413       pd->stackblock = (char *) attr->stackaddr - size;
 414       pd->stackblock_size = size;
 415
 416       /* This is a user-provided stack.  It will not be queued in the
 417          stack cache nor will the memory (except the TLS memory) be freed.  */
 418       pd->user_stack = true;
 419
 420       /* This is at least the second thread.  */
 421       pd->header.multiple_threads = 1;
 422 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 423       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 424 #endif
 425
 426 #ifndef __ASSUME_PRIVATE_FUTEX
 427       /* The thread must know when private futexes are supported.  */
 428       pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
 429                                                 header.private_futex);
 430 #endif
 431
 432 #ifdef NEED_DL_SYSINFO
 433       SETUP_THREAD_SYSINFO (pd);
 434 #endif
 435
 436       /* The process ID is also the same as that of the caller.  */
 437       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 438
 439       /* Don't allow setxid until cloned.  */
 440       pd->setxid_futex = -1;
 441
 442       /* Allocate the DTV for this thread.  */
 443       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 444         {
 445           /* Something went wrong.  */
 446           assert (errno == ENOMEM);
 447           return errno;
 448         }
 449
 450
 451       /* Prepare to modify global data.  */
 452       lll_lock (stack_cache_lock, LLL_PRIVATE);
 453
 454       /* And add to the list of stacks in use.  */
 455       list_add (&pd->list, &__stack_user);
 456
 457       lll_unlock (stack_cache_lock, LLL_PRIVATE);
 458     }
 459   else
 460     {
 461       /* Allocate some anonymous memory.  If possible use the cache.  */
 462       size_t guardsize;
 463       size_t reqsize;
 464       void *mem;
 465       const int prot = (PROT_READ | PROT_WRITE
 466                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 467
 468 #if COLORING_INCREMENT != 0
 469       /* Add one more page for stack coloring.  Don't do it for stacks
 470          with 16 times pagesize or larger.  This might just cause
 471          unnecessary misalignment.  */
 472       if (size <= 16 * pagesize_m1)
 473         size += pagesize_m1 + 1;
 474 #endif
 475
 476       /* Adjust the stack size for alignment.  */
 477       size &= ~__static_tls_align_m1;
 478       assert (size != 0);
 479
 480       /* Make sure the size of the stack is enough for the guard and
 481          eventually the thread descriptor.  */
 482       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
 483       if (__builtin_expect (size < ((guardsize + __static_tls_size
 484                                      + MINIMAL_REST_STACK + pagesize_m1)
 485                                     & ~pagesize_m1),
 486                             0))
 487         /* The stack is too small (or the guard too large).  */
 488         return EINVAL;
 489
 490       /* Try to get a stack from the cache.  */
 491       reqsize = size;
 492       pd = get_cached_stack (&size, &mem);
 493       if (pd == NULL)
 494         {
 495           /* To avoid aliasing effects on a larger scale than pages we
 496              adjust the allocated stack size if necessary.  This way
 497              allocations directly following each other will not have
 498              aliasing problems.  */
 499 #if MULTI_PAGE_ALIASING != 0
 500           if ((size % MULTI_PAGE_ALIASING) == 0)
 501             size += pagesize_m1 + 1;
 502 #endif
 503
 504           mem = mmap (NULL, size, prot,
 505                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 506
 507           if (__glibc_unlikely (mem == MAP_FAILED))
 508             return errno;
 509
 510           /* SIZE is guaranteed to be greater than zero.
 511              So we can never get a null pointer back from mmap.  */
 512           assert (mem != NULL);
 513
 514 #if COLORING_INCREMENT != 0
 515           /* Atomically increment NCREATED.  */
 516           unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
 517
 518           /* We chose the offset for coloring by incrementing it for
 519              every new thread by a fixed amount.  The offset used
 520              module the page size.  Even if coloring would be better
 521              relative to higher alignment values it makes no sense to
 522              do it since the mmap() interface does not allow us to
 523              specify any alignment for the returned memory block.  */
 524           size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
 525
 526           /* Make sure the coloring offsets does not disturb the alignment
 527              of the TCB and static TLS block.  */
 528           if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
 529             coloring = (((coloring + __static_tls_align_m1)
 530                          & ~(__static_tls_align_m1))
 531                         & ~pagesize_m1);
 532 #else
 533           /* Unless specified we do not make any adjustments.  */
 534 # define coloring 0
 535 #endif
 536
 537           /* Place the thread descriptor at the end of the stack.  */
 538 #if TLS_TCB_AT_TP
 539           pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
 540 #elif TLS_DTV_AT_TP
 541           pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
 542                                     - __static_tls_size)
 543                                     & ~__static_tls_align_m1)
 544                                    - TLS_PRE_TCB_SIZE);
 545 #endif
 546
 547           /* Remember the stack-related values.  */
 548           pd->stackblock = mem;
 549           pd->stackblock_size = size;
 550
 551           /* We allocated the first block thread-specific data array.
 552              This address will not change for the lifetime of this
 553              descriptor.  */
 554           pd->specific[0] = pd->specific_1stblock;
 555
 556           /* This is at least the second thread.  */
 557           pd->header.multiple_threads = 1;
 558 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 559           __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 560 #endif
 561
 562 #ifndef __ASSUME_PRIVATE_FUTEX
 563           /* The thread must know when private futexes are supported.  */
 564           pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
 565                                                     header.private_futex);
 566 #endif
 567
 568 #ifdef NEED_DL_SYSINFO
 569           SETUP_THREAD_SYSINFO (pd);
 570 #endif
 571
 572           /* Don't allow setxid until cloned.  */
 573           pd->setxid_futex = -1;
 574
 575           /* The process ID is also the same as that of the caller.  */
 576           pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 577
 578           /* Allocate the DTV for this thread.  */
 579           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 580             {
 581               /* Something went wrong.  */
 582               assert (errno == ENOMEM);
 583
 584               /* Free the stack memory we just allocated.  */
 585               (void) munmap (mem, size);
 586
 587               return errno;
 588             }
 589
 590
 591           /* Prepare to modify global data.  */
 592           lll_lock (stack_cache_lock, LLL_PRIVATE);
 593
 594           /* And add to the list of stacks in use.  */
 595           stack_list_add (&pd->list, &stack_used);
 596
 597           lll_unlock (stack_cache_lock, LLL_PRIVATE);
 598
 599
 600           /* There might have been a race.  Another thread might have
 601              caused the stacks to get exec permission while this new
 602              stack was prepared.  Detect if this was possible and
 603              change the permission if necessary.  */
 604           if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
 605                                 && (prot & PROT_EXEC) == 0, 0))
 606             {
 607               int err = change_stack_perm (pd
 608 #ifdef NEED_SEPARATE_REGISTER_STACK
 609                                            , ~pagesize_m1
 610 #endif
 611                                            );
 612               if (err != 0)
 613                 {
 614                   /* Free the stack memory we just allocated.  */
 615                   (void) munmap (mem, size);
 616
 617                   return err;
 618                 }
 619             }
 620
 621
 622           /* Note that all of the stack and the thread descriptor is
 623              zeroed.  This means we do not have to initialize fields
 624              with initial value zero.  This is specifically true for
 625              the 'tid' field which is always set back to zero once the
 626              stack is not used anymore and for the 'guardsize' field
 627              which will be read next.  */
 628         }
 629
 630       /* Create or resize the guard area if necessary.  */
 631       if (__glibc_unlikely (guardsize > pd->guardsize))
 632         {
 633 #ifdef NEED_SEPARATE_REGISTER_STACK
 634           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 635 #elif _STACK_GROWS_DOWN
 636           char *guard = mem;
 637 # elif _STACK_GROWS_UP
 638           char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
 639 #endif
 640           if (mprotect (guard, guardsize, PROT_NONE) != 0)
 641             {
 642             mprot_error:
 643               lll_lock (stack_cache_lock, LLL_PRIVATE);
 644
 645               /* Remove the thread from the list.  */
 646               stack_list_del (&pd->list);
 647
 648               lll_unlock (stack_cache_lock, LLL_PRIVATE);
 649
 650               /* Get rid of the TLS block we allocated.  */
 651               _dl_deallocate_tls (TLS_TPADJ (pd), false);
 652
 653               /* Free the stack memory regardless of whether the size
 654                  of the cache is over the limit or not.  If this piece
 655                  of memory caused problems we better do not use it
 656                  anymore.  Uh, and we ignore possible errors.  There
 657                  is nothing we could do.  */
 658               (void) munmap (mem, size);
 659
 660               return errno;
 661             }
 662
 663           pd->guardsize = guardsize;
 664         }
 665       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
 666                                  0))
 667         {
 668           /* The old guard area is too large.  */
 669
 670 #ifdef NEED_SEPARATE_REGISTER_STACK
 671           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 672           char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
 673
 674           if (oldguard < guard
 675               && mprotect (oldguard, guard - oldguard, prot) != 0)
 676             goto mprot_error;
 677
 678           if (mprotect (guard + guardsize,
 679                         oldguard + pd->guardsize - guard - guardsize,
 680                         prot) != 0)
 681             goto mprot_error;
 682 #elif _STACK_GROWS_DOWN
 683           if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
 684                         prot) != 0)
 685             goto mprot_error;
 686 #elif _STACK_GROWS_UP
 687           if (mprotect ((char *) pd - pd->guardsize,
 688                         pd->guardsize - guardsize, prot) != 0)
 689             goto mprot_error;
 690 #endif
 691
 692           pd->guardsize = guardsize;
 693         }
 694       /* The pthread_getattr_np() calls need to get passed the size
 695          requested in the attribute, regardless of how large the
 696          actually used guardsize is.  */
 697       pd->reported_guardsize = guardsize;
 698     }
 699
 700   /* Initialize the lock.  We have to do this unconditionally since the
 701      stillborn thread could be canceled while the lock is taken.  */
 702   pd->lock = LLL_LOCK_INITIALIZER;
 703
 704   /* The robust mutex lists also need to be initialized
 705      unconditionally because the cleanup for the previous stack owner
 706      might have happened in the kernel.  */
 707   pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
 708                                   - offsetof (pthread_mutex_t,
 709                                               __data.__list.__next));
 710   pd->robust_head.list_op_pending = NULL;
 711 #ifdef __PTHREAD_MUTEX_HAVE_PREV
 712   pd->robust_prev = &pd->robust_head;
 713 #endif
 714   pd->robust_head.list = &pd->robust_head;
 715
 716   /* We place the thread descriptor at the end of the stack.  */
 717   *pdp = pd;
 718
 719 #if TLS_TCB_AT_TP
 720   /* The stack begins before the TCB and the static TLS block.  */
 721   stacktop = ((char *) (pd + 1) - __static_tls_size);
 722 #elif TLS_DTV_AT_TP
 723   stacktop = (char *) (pd - 1);
 724 #endif
 725
 726 #ifdef NEED_SEPARATE_REGISTER_STACK
 727   *stack = pd->stackblock;
 728   *stacksize = stacktop - *stack;
 729 #elif _STACK_GROWS_DOWN
 730   *stack = stacktop;
 731 #elif _STACK_GROWS_UP
 732   *stack = pd->stackblock;
 733   assert (*stack > 0);
 734 #endif
 735
 736   return 0;
 737 }
 738
 739
 740 void
 741 internal_function
 742 __deallocate_stack (struct pthread *pd)
 743 {
 744   lll_lock (stack_cache_lock, LLL_PRIVATE);
 745
 746   /* Remove the thread from the list of threads with user defined
 747      stacks.  */
 748   stack_list_del (&pd->list);
 749
 750   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
 751      not reset the 'used' flag in the 'tid' field.  This is done by
 752      the kernel.  If no thread has been created yet this field is
 753      still zero.  */
 754   if (__glibc_likely (! pd->user_stack))
 755     (void) queue_stack (pd);
 756   else
 757     /* Free the memory associated with the ELF TLS.  */
 758     _dl_deallocate_tls (TLS_TPADJ (pd), false);
 759
 760   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 761 }
 762
 763
 764 int
 765 internal_function
 766 __make_stacks_executable (void **stack_endp)
 767 {
 768   /* First the main thread's stack.  */
 769   int err = _dl_make_stack_executable (stack_endp);
 770   if (err != 0)
 771     return err;
 772
 773 #ifdef NEED_SEPARATE_REGISTER_STACK
 774   const size_t pagemask = ~(__getpagesize () - 1);
 775 #endif
 776
 777   lll_lock (stack_cache_lock, LLL_PRIVATE);
 778
 779   list_t *runp;
 780   list_for_each (runp, &stack_used)
 781     {
 782       err = change_stack_perm (list_entry (runp, struct pthread, list)
 783 #ifdef NEED_SEPARATE_REGISTER_STACK
 784                                , pagemask
 785 #endif
 786                                );
 787       if (err != 0)
 788         break;
 789     }
 790
 791   /* Also change the permission for the currently unused stacks.  This
 792      might be wasted time but better spend it here than adding a check
 793      in the fast path.  */
 794   if (err == 0)
 795     list_for_each (runp, &stack_cache)
 796       {
 797         err = change_stack_perm (list_entry (runp, struct pthread, list)
 798 #ifdef NEED_SEPARATE_REGISTER_STACK
 799                                  , pagemask
 800 #endif
 801                                  );
 802         if (err != 0)
 803           break;
 804       }
 805
 806   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 807
 808   return err;
 809 }
 810
 811
 812 /* In case of a fork() call the memory allocation in the child will be
 813    the same but only one thread is running.  All stacks except that of
 814    the one running thread are not used anymore.  We have to recycle
 815    them.  */
 816 void
 817 __reclaim_stacks (void)
 818 {
 819   struct pthread *self = (struct pthread *) THREAD_SELF;
 820
 821   /* No locking necessary.  The caller is the only stack in use.  But
 822      we have to be aware that we might have interrupted a list
 823      operation.  */
 824
 825   if (in_flight_stack != 0)
 826     {
 827       bool add_p = in_flight_stack & 1;
 828       list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
 829
 830       if (add_p)
 831         {
 832           /* We always add at the beginning of the list.  So in this case we
 833              only need to check the beginning of these lists to see if the
 834              pointers at the head of the list are inconsistent.  */
 835           list_t *l = NULL;
 836
 837           if (stack_used.next->prev != &stack_used)
 838             l = &stack_used;
 839           else if (stack_cache.next->prev != &stack_cache)
 840             l = &stack_cache;
 841
 842           if (l != NULL)
 843             {
 844               assert (l->next->prev == elem);
 845               elem->next = l->next;
 846               elem->prev = l;
 847               l->next = elem;
 848             }
 849         }
 850       else
 851         {
 852           /* We can simply always replay the delete operation.  */
 853           elem->next->prev = elem->prev;
 854           elem->prev->next = elem->next;
 855         }
 856     }
 857
 858   /* Mark all stacks except the still running one as free.  */
 859   list_t *runp;
 860   list_for_each (runp, &stack_used)
 861     {
 862       struct pthread *curp = list_entry (runp, struct pthread, list);
 863       if (curp != self)
 864         {
 865           /* This marks the stack as free.  */
 866           curp->tid = 0;
 867
 868           /* The PID field must be initialized for the new process.  */
 869           curp->pid = self->pid;
 870
 871           /* Account for the size of the stack.  */
 872           stack_cache_actsize += curp->stackblock_size;
 873
 874           if (curp->specific_used)
 875             {
 876               /* Clear the thread-specific data.  */
 877               memset (curp->specific_1stblock, '\0',
 878                       sizeof (curp->specific_1stblock));
 879
 880               curp->specific_used = false;
 881
 882               for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
 883                 if (curp->specific[cnt] != NULL)
 884                   {
 885                     memset (curp->specific[cnt], '\0',
 886                             sizeof (curp->specific_1stblock));
 887
 888                     /* We have allocated the block which we do not
 889                        free here so re-set the bit.  */
 890                     curp->specific_used = true;
 891                   }
 892             }
 893         }
 894     }
 895
 896   /* Reset the PIDs in any cached stacks.  */
 897   list_for_each (runp, &stack_cache)
 898     {
 899       struct pthread *curp = list_entry (runp, struct pthread, list);
 900       curp->pid = self->pid;
 901     }
 902
 903   /* Add the stack of all running threads to the cache.  */
 904   list_splice (&stack_used, &stack_cache);
 905
 906   /* Remove the entry for the current thread to from the cache list
 907      and add it to the list of running threads.  Which of the two
 908      lists is decided by the user_stack flag.  */
 909   stack_list_del (&self->list);
 910
 911   /* Re-initialize the lists for all the threads.  */
 912   INIT_LIST_HEAD (&stack_used);
 913   INIT_LIST_HEAD (&__stack_user);
 914
 915   if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
 916     list_add (&self->list, &__stack_user);
 917   else
 918     list_add (&self->list, &stack_used);
 919
 920   /* There is one thread running.  */
 921   __nptl_nthreads = 1;
 922
 923   in_flight_stack = 0;
 924
 925   /* Initialize locks.  */
 926   stack_cache_lock = LLL_LOCK_INITIALIZER;
 927   __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
 928 }
 929
 930
 931 #if HP_TIMING_AVAIL
 932 # undef __find_thread_by_id
 933 /* Find a thread given the thread ID.  */
 934 attribute_hidden
 935 struct pthread *
 936 __find_thread_by_id (pid_t tid)
 937 {
 938   struct pthread *result = NULL;
 939
 940   lll_lock (stack_cache_lock, LLL_PRIVATE);
 941
 942   /* Iterate over the list with system-allocated threads first.  */
 943   list_t *runp;
 944   list_for_each (runp, &stack_used)
 945     {
 946       struct pthread *curp;
 947
 948       curp = list_entry (runp, struct pthread, list);
 949
 950       if (curp->tid == tid)
 951         {
 952           result = curp;
 953           goto out;
 954         }
 955     }
 956
 957   /* Now the list with threads using user-allocated stacks.  */
 958   list_for_each (runp, &__stack_user)
 959     {
 960       struct pthread *curp;
 961
 962       curp = list_entry (runp, struct pthread, list);
 963
 964       if (curp->tid == tid)
 965         {
 966           result = curp;
 967           goto out;
 968         }
 969     }
 970
 971  out:
 972   lll_unlock (stack_cache_lock, LLL_PRIVATE);
 973
 974   return result;
 975 }
 976 #endif
 977
 978
 979 #ifdef SIGSETXID
 980 static void
 981 internal_function
 982 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
 983 {
 984   int ch;
 985
 986   /* Wait until this thread is cloned.  */
 987   if (t->setxid_futex == -1
 988       && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
 989     do
 990       lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
 991     while (t->setxid_futex == -2);
 992
 993   /* Don't let the thread exit before the setxid handler runs.  */
 994   t->setxid_futex = 0;
 995
 996   do
 997     {
 998       ch = t->cancelhandling;
 999
1000       /* If the thread is exiting right now, ignore it.  */
1001       if ((ch & EXITING_BITMASK) != 0)
1002         {
1003           /* Release the futex if there is no other setxid in
1004              progress.  */
1005           if ((ch & SETXID_BITMASK) == 0)
1006             {
1007               t->setxid_futex = 1;
1008               lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1009             }
1010           return;
1011         }
1012     }
1013   while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1014                                                ch | SETXID_BITMASK, ch));
1015 }
1016
1017
1018 static void
1019 internal_function
1020 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
1021 {
1022   int ch;
1023
1024   do
1025     {
1026       ch = t->cancelhandling;
1027       if ((ch & SETXID_BITMASK) == 0)
1028         return;
1029     }
1030   while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1031                                                ch & ~SETXID_BITMASK, ch));
1032
1033   /* Release the futex just in case.  */
1034   t->setxid_futex = 1;
1035   lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1036 }
1037
1038
1039 static int
1040 internal_function
1041 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
1042 {
1043   if ((t->cancelhandling & SETXID_BITMASK) == 0)
1044     return 0;
1045
1046   int val;
1047   INTERNAL_SYSCALL_DECL (err);
1048   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
1049                           t->tid, SIGSETXID);
1050
1051   /* If this failed, it must have had not started yet or else exited.  */
1052   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1053     {
1054       atomic_increment (&cmdp->cntr);
1055       return 1;
1056     }
1057   else
1058     return 0;
1059 }
1060
1061 /* Check for consistency across set*id system call results.  The abort
1062    should not happen as long as all privileges changes happen through
1063    the glibc wrappers.  ERROR must be 0 (no error) or an errno
1064    code.  */
1065 void
1066 attribute_hidden
1067 __nptl_setxid_error (struct xid_command *cmdp, int error)
1068 {
1069   do
1070     {
1071       int olderror = cmdp->error;
1072       if (olderror == error)
1073         break;
1074       if (olderror != -1)
1075         /* Mismatch between current and previous results.  */
1076         abort ();
1077     }
1078   while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -1));
1079 }
1080
1081 int
1082 attribute_hidden
1083 __nptl_setxid (struct xid_command *cmdp)
1084 {
1085   int signalled;
1086   int result;
1087   lll_lock (stack_cache_lock, LLL_PRIVATE);
1088
1089   __xidcmd = cmdp;
1090   cmdp->cntr = 0;
1091   cmdp->error = -1;
1092
1093   struct pthread *self = THREAD_SELF;
1094
1095   /* Iterate over the list with system-allocated threads first.  */
1096   list_t *runp;
1097   list_for_each (runp, &stack_used)
1098     {
1099       struct pthread *t = list_entry (runp, struct pthread, list);
1100       if (t == self)
1101         continue;
1102
1103       setxid_mark_thread (cmdp, t);
1104     }
1105
1106   /* Now the list with threads using user-allocated stacks.  */
1107   list_for_each (runp, &__stack_user)
1108     {
1109       struct pthread *t = list_entry (runp, struct pthread, list);
1110       if (t == self)
1111         continue;
1112
1113       setxid_mark_thread (cmdp, t);
1114     }
1115
1116   /* Iterate until we don't succeed in signalling anyone.  That means
1117      we have gotten all running threads, and their children will be
1118      automatically correct once started.  */
1119   do
1120     {
1121       signalled = 0;
1122
1123       list_for_each (runp, &stack_used)
1124         {
1125           struct pthread *t = list_entry (runp, struct pthread, list);
1126           if (t == self)
1127             continue;
1128
1129           signalled += setxid_signal_thread (cmdp, t);
1130         }
1131
1132       list_for_each (runp, &__stack_user)
1133         {
1134           struct pthread *t = list_entry (runp, struct pthread, list);
1135           if (t == self)
1136             continue;
1137
1138           signalled += setxid_signal_thread (cmdp, t);
1139         }
1140
1141       int cur = cmdp->cntr;
1142       while (cur != 0)
1143         {
1144           lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
1145           cur = cmdp->cntr;
1146         }
1147     }
1148   while (signalled != 0);
1149
1150   /* Clean up flags, so that no thread blocks during exit waiting
1151      for a signal which will never come.  */
1152   list_for_each (runp, &stack_used)
1153     {
1154       struct pthread *t = list_entry (runp, struct pthread, list);
1155       if (t == self)
1156         continue;
1157
1158       setxid_unmark_thread (cmdp, t);
1159     }
1160
1161   list_for_each (runp, &__stack_user)
1162     {
1163       struct pthread *t = list_entry (runp, struct pthread, list);
1164       if (t == self)
1165         continue;
1166
1167       setxid_unmark_thread (cmdp, t);
1168     }
1169
1170   /* This must be last, otherwise the current thread might not have
1171      permissions to send SIGSETXID syscall to the other threads.  */
1172   INTERNAL_SYSCALL_DECL (err);
1173   result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1174                                  cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1175   int error = 0;
1176   if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
1177     {
1178       error = INTERNAL_SYSCALL_ERRNO (result, err);
1179       __set_errno (error);
1180       result = -1;
1181     }
1182   __nptl_setxid_error (cmdp, error);
1183
1184   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1185   return result;
1186 }
1187 #endif  /* SIGSETXID.  */
1188
1189
1190 static inline void __attribute__((always_inline))
1191 init_one_static_tls (struct pthread *curp, struct link_map *map)
1192 {
1193   dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
1194 # if TLS_TCB_AT_TP
1195   void *dest = (char *) curp - map->l_tls_offset;
1196 # elif TLS_DTV_AT_TP
1197   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1198 # else
1199 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1200 # endif
1201
1202   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
1203   dtv[map->l_tls_modid].pointer.val = dest;
1204   dtv[map->l_tls_modid].pointer.is_static = true;
1205
1206   /* Initialize the memory.  */
1207   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1208           '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1209 }
1210
1211 void
1212 attribute_hidden
1213 __pthread_init_static_tls (struct link_map *map)
1214 {
1215   lll_lock (stack_cache_lock, LLL_PRIVATE);
1216
1217   /* Iterate over the list with system-allocated threads first.  */
1218   list_t *runp;
1219   list_for_each (runp, &stack_used)
1220     init_one_static_tls (list_entry (runp, struct pthread, list), map);
1221
1222   /* Now the list with threads using user-allocated stacks.  */
1223   list_for_each (runp, &__stack_user)
1224     init_one_static_tls (list_entry (runp, struct pthread, list), map);
1225
1226   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1227 }
1228
1229
1230 void
1231 attribute_hidden
1232 __wait_lookup_done (void)
1233 {
1234   lll_lock (stack_cache_lock, LLL_PRIVATE);
1235
1236   struct pthread *self = THREAD_SELF;
1237
1238   /* Iterate over the list with system-allocated threads first.  */
1239   list_t *runp;
1240   list_for_each (runp, &stack_used)
1241     {
1242       struct pthread *t = list_entry (runp, struct pthread, list);
1243       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1244         continue;
1245
1246       int *const gscope_flagp = &t->header.gscope_flag;
1247
1248       /* We have to wait until this thread is done with the global
1249          scope.  First tell the thread that we are waiting and
1250          possibly have to be woken.  */
1251       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1252                                                 THREAD_GSCOPE_FLAG_WAIT,
1253                                                 THREAD_GSCOPE_FLAG_USED))
1254         continue;
1255
1256       do
1257         lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1258       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1259     }
1260
1261   /* Now the list with threads using user-allocated stacks.  */
1262   list_for_each (runp, &__stack_user)
1263     {
1264       struct pthread *t = list_entry (runp, struct pthread, list);
1265       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1266         continue;
1267
1268       int *const gscope_flagp = &t->header.gscope_flag;
1269
1270       /* We have to wait until this thread is done with the global
1271          scope.  First tell the thread that we are waiting and
1272          possibly have to be woken.  */
1273       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1274                                                 THREAD_GSCOPE_FLAG_WAIT,
1275                                                 THREAD_GSCOPE_FLAG_USED))
1276         continue;
1277
1278       do
1279         lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1280       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1281     }
1282
1283   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1284 }