nptl/allocatestack.c

   1 /* Copyright (C) 2002 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #include <assert.h>
  21 #include <errno.h>
  22 #include <stdint.h>
  23 #include <string.h>
  24 #include <unistd.h>
  25 #include <sys/mman.h>
  26 #include <sys/param.h>
  27 #include <tls.h>
  28
  29
  30
  31
  32 /* Most architectures have exactly one stack pointer.  Some have more.  */
  33 #define STACK_VARIABLES void *stackaddr
  34
  35 /* How to pass the values to the 'create_thread' function.  */
  36 #define STACK_VARIABLES_ARGS stackaddr
  37
  38 /* How to declare function which gets there parameters.  */
  39 #define STACK_VARIABLES_PARMS void *stackaddr
  40
  41
  42 /* Default alignment of stack.  */
  43 #ifndef STACK_ALIGN
  44 # define STACK_ALIGN __alignof__ (long double)
  45 #endif
  46
  47 /* Default value for minimal stack size after allocating thread
  48    descriptor and guard.  */
  49 #ifndef MINIMAL_REST_STACK
  50 # define MINIMAL_REST_STACK     4096
  51 #endif
  52
  53
  54
  55
  56 /* Cache handling for not-yet free stacks.  */
  57
  58 /* Maximum size in kB of cache.  */
  59 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
  60 static size_t stack_cache_actsize;
  61
  62 /* Mutex protecting this variable.  */
  63 static lll_lock_t stack_cache_lock = LLL_LOCK_INITIALIZER;
  64
  65 /* List of queued stack frames.  */
  66 static LIST_HEAD (stack_cache);
  67
  68 /* List of the stacks in use.  */
  69 static LIST_HEAD (stack_used);
  70
  71 /* List of the threads with user provided stacks in use.  */
  72 LIST_HEAD (__stack_user);
  73
  74 /* Number of threads running.  */
  75 static unsigned int nptl_nthreads = 1;
  76
  77
  78 /* Check whether the stack is still used or not.  */
  79 #define FREE_P(descr) ((descr)->tid == 0)
  80
  81
  82 /* We create a double linked list of all cache entries.  Double linked
  83    because this allows removing entries from the end.  */
  84
  85
  86 /* Get a stack frame from the cache.  We have to match by size since
  87    some blocks might be too small or far too large.  */
  88 static struct pthread *
  89 get_cached_stack (size_t *sizep, void **memp)
  90 {
  91   size_t size = *sizep;
  92   struct pthread *result = NULL;
  93   list_t *entry;
  94
  95   lll_lock (stack_cache_lock);
  96
  97   /* Search the cache for a matching entry.  We search for the
  98      smallest stack which has at least the required size.  Note that
  99      in normal situations the size of all allocated stacks is the
 100      same.  As the very least there are only a few different sizes.
 101      Therefore this loop will exit early most of the time with an
 102      exact match.  */
 103   list_for_each (entry, &stack_cache)
 104     {
 105       struct pthread *curr;
 106
 107       curr = list_entry(entry, struct pthread, header.data.list);
 108       if (FREE_P (curr) && curr->stackblock_size >= size)
 109         {
 110           if (curr->stackblock_size == size)
 111             {
 112               result = curr;
 113               break;
 114             }
 115
 116           if (result->stackblock_size > curr->stackblock_size)
 117             result = curr;
 118         }
 119     }
 120
 121   if (__builtin_expect (result == NULL, 0)
 122       /* Make sure the size difference is not too excessive.  In that
 123          case we do not use the block.  */
 124       || __builtin_expect (result->stackblock_size > 4 * size, 0))
 125     {
 126       /* Release the lock.  */
 127       lll_unlock (stack_cache_lock);
 128
 129       return NULL;
 130     }
 131
 132   /* Dequeue the entry.  */
 133   list_del (&result->header.data.list);
 134
 135   /* And add to the list of stacks in use.  */
 136   list_add (&result->header.data.list, &stack_used);
 137
 138   /* One more thread.  */
 139   ++nptl_nthreads;
 140
 141   /* And decrease the cache size.  */
 142   stack_cache_actsize -= result->stackblock_size;
 143
 144   /* Release the lock early.  */
 145   lll_unlock (stack_cache_lock);
 146
 147
 148   *sizep = result->stackblock_size;
 149   *memp = result->stackblock;
 150
 151   /* Cancellation handling is back to the default.  */
 152   result->cancelhandling = 0;
 153   result->cleanup = NULL;
 154
 155   /* No pending event.  */
 156   result->nextevent = NULL;
 157
 158   /* Clear the DTV.  */
 159   dtv_t *dtv = GET_DTV (result);
 160   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 161
 162   /* Re-initialize the TLS.  */
 163   return _dl_allocate_tls_init (result);
 164 }
 165
 166
 167 /* Add a stack frame which is not used anymore to the stack.  Must be
 168    called with the cache lock held.  */
 169 static void
 170 queue_stack (struct pthread *stack)
 171 {
 172   /* We unconditionally add the stack to the list.  The memory may
 173      still be in use but it will not be reused until the kernel marks
 174      the stack as not used anymore.  */
 175   list_add (&stack->header.data.list, &stack_cache);
 176
 177   stack_cache_actsize += stack->stackblock_size;
 178   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
 179     {
 180       /* We reduce the size of the cache.  Remove the last entries
 181          until the size is below the limit.  */
 182       list_t *entry;
 183       list_t *prev;
 184
 185       /* Search from the end of the list.  */
 186       list_for_each_prev_safe (entry, prev, &stack_cache)
 187         {
 188           struct pthread *curr;
 189
 190           curr = list_entry(entry, struct pthread, header.data.list);
 191           if (FREE_P (curr))
 192             {
 193               /* Unlink the block.  */
 194               list_del (entry);
 195
 196               /* Account for the freed memory.  */
 197               stack_cache_actsize -= curr->stackblock_size;
 198
 199               /* Free the memory associated with the ELF TLS.  */
 200               _dl_deallocate_tls (curr, false);
 201
 202               /* Remove this block.  This should never fail.  If it
 203                  does something is really wrong.  */
 204               if (munmap (curr->stackblock, curr->stackblock_size) != 0)
 205                 abort ();
 206
 207               /* Maybe we have freed enough.  */
 208               if (stack_cache_actsize <= stack_cache_maxsize)
 209                 break;
 210             }
 211         }
 212     }
 213 }
 214
 215
 216
 217 static int
 218 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 219                 void **stack)
 220 {
 221   struct pthread *pd;
 222   size_t size;
 223   size_t pagesize = __sysconf (_SC_PAGESIZE);
 224
 225   assert (attr != NULL);
 226   assert (powerof2 (pagesize));
 227   assert (TCB_ALIGNMENT >= STACK_ALIGN);
 228
 229   /* Get the stack size from the attribute if it is set.  Otherwise we
 230      use the default we determined at start time.  */
 231   size = attr->stacksize ?: __default_stacksize;
 232
 233   /* Get memory for the stack.  */
 234   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
 235     {
 236       uintptr_t adj;
 237
 238       /* If the user also specified the size of the stack make sure it
 239          is large enough.  */
 240       if (attr->stacksize != 0
 241           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
 242         return EINVAL;
 243
 244       /* Adjust stack size for alignment of the TLS block.  */
 245       adj = ((uintptr_t) attr->stackaddr) & (__static_tls_align - 1);
 246       assert (size > adj);
 247
 248       /* The user provided some memory.  Let's hope it matches the
 249          size...  We do not allocate guard pages if the user provided
 250          the stack.  It is the user's responsibility to do this if it
 251          is wanted.  */
 252       pd = (struct pthread *) (((uintptr_t) attr->stackaddr - adj)
 253                                & ~(__alignof (struct pthread) - 1)) - 1;
 254
 255       /* The user provided stack memory need not be cleared.  */
 256       memset (pd, '\0', sizeof (struct pthread));
 257
 258       /* The first TSD block is included in the TCB.  */
 259       pd->specific[0] = pd->specific_1stblock;
 260
 261       /* Initialize the lock.  */
 262       pd->lock = LLL_LOCK_INITIALIZER;
 263
 264       /* Remember the stack-related values.  Signal that this stack
 265          must not be put into the stack cache.  */
 266       pd->stackblock = (char *) attr->stackaddr - size;
 267       pd->stackblock_size = size - adj;
 268
 269       /* This is a user-provided stack.  */
 270       pd->user_stack = true;
 271
 272       /* Allocate the DTV for this thread.  */
 273       if (_dl_allocate_tls (pd) == NULL)
 274         /* Something went wrong.  */
 275         return errno;
 276
 277
 278       lll_lock (stack_cache_lock);
 279
 280       /* And add to the list of stacks in use.  */
 281       list_add (&pd->header.data.list, &__stack_user);
 282
 283       /* One more thread.  */
 284       ++nptl_nthreads;
 285
 286       lll_unlock (stack_cache_lock);
 287     }
 288   else
 289     {
 290       /* Allocate some anonymous memory.  If possible use the
 291          cache.  */
 292       size_t guardsize;
 293       size_t reqsize;
 294       void *mem;
 295
 296       /* Adjust the stack size for alignment.  */
 297       size &= ~(__static_tls_align - 1);
 298       assert (size != 0);
 299
 300       /* Make sure the size of the stack is enough for the guard and
 301          eventually the thread descriptor.  */
 302       guardsize = (attr->guardsize + pagesize - 1) & ~(pagesize - 1);
 303       if (__builtin_expect (size < (guardsize + __static_tls_size
 304                                     + MINIMAL_REST_STACK), 0))
 305         /* The stack is too small (or the guard too large).  */
 306         return EINVAL;
 307
 308       reqsize = size;
 309       pd = get_cached_stack (&size, &mem);
 310       if (pd == NULL)
 311         {
 312           mem = mmap (NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
 313                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 314
 315           if (__builtin_expect (mem == MAP_FAILED, 0))
 316             return errno;
 317
 318           /* 'size' is guaranteed to be greater than zero.  So we can
 319              never get a NULL pointer back from MMAP.  */
 320           assert (mem != NULL);
 321
 322           /* Place the thread descriptor at the end of the stack.  */
 323           pd = (struct pthread *) ((char *) mem + size) - 1;
 324
 325           /* Remember the stack-related values.  */
 326           pd->stackblock = mem;
 327           pd->stackblock_size = size;
 328
 329           /* We allocated the first block thread-specific data array.
 330              This address will not change for the lifetime of this
 331              descriptor.  */
 332           pd->specific[0] = pd->specific_1stblock;
 333
 334           /* Initialize the lock.  */
 335           pd->lock = LLL_LOCK_INITIALIZER;
 336
 337           /* Allocate the DTV for this thread.  */
 338           if (_dl_allocate_tls (pd) == NULL)
 339             {
 340               /* Something went wrong.  */
 341               int err = errno;
 342
 343               /* Free the stack memory we just allocated.  */
 344               munmap (mem, size);
 345
 346               return err;
 347             }
 348
 349
 350           lll_lock (stack_cache_lock);
 351
 352           /* And add to the list of stacks in use.  */
 353           list_add (&pd->header.data.list, &stack_used);
 354
 355           /* One more thread.  */
 356           ++nptl_nthreads;
 357
 358           lll_unlock (stack_cache_lock);
 359
 360
 361           /* Note that all of the stack and the thread descriptor is
 362              zeroed.  This means we do not have to initialize fields
 363              with initial value zero.  This is specifically true for
 364              the 'tid' field which is always set back to zero once the
 365              stack is not used anymore and for the 'guardsize' field
 366              which will be read next.  */
 367         }
 368
 369       /* Create or resize the guard area if necessary.  */
 370       if (__builtin_expect (guardsize > pd->guardsize, 0))
 371         {
 372           if (mprotect (mem, guardsize, PROT_NONE) != 0)
 373             {
 374               int err;
 375             mprot_error:
 376               err = errno;
 377
 378               lll_lock (stack_cache_lock);
 379
 380               /* Remove the thread from the list.  */
 381               list_del (&pd->header.data.list);
 382
 383               /* The thread is gone.  */
 384               --nptl_nthreads;
 385
 386               lll_unlock (stack_cache_lock);
 387
 388               /* Free the memory regardless of whether the size of the
 389                  cache is over the limit or not.  If this piece of
 390                  memory caused problems we better do not use it
 391                  anymore.  Uh, and we ignore possible errors.  There
 392                  is nothing we could do.  */
 393               (void) munmap (mem, size);
 394
 395               return err;
 396             }
 397
 398           pd->guardsize = guardsize;
 399         }
 400       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
 401                                  0))
 402         {
 403           /* The old guard area is too large.  */
 404           if (mprotect ((char *) mem + guardsize,
 405                         pd->guardsize - guardsize,
 406                         PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
 407             goto mprot_error;
 408
 409           pd->guardsize = guardsize;
 410         }
 411     }
 412
 413   /* We place the thread descriptor at the end of the stack.  */
 414   *pdp = pd;
 415
 416 #if TLS_TCB_AT_TP
 417   /* The stack begin before the TCB and the static TLS block.  */
 418   *stack = ((char *) (pd + 1) - __static_tls_size);
 419 #else
 420 # error "Implement me"
 421 #endif
 422
 423   return 0;
 424 }
 425
 426 /* This is how the function is called.  We do it this way to allow
 427    other variants of the function to have more parameters.  */
 428 #define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
 429
 430
 431 void
 432 __deallocate_stack (struct pthread *pd)
 433 {
 434   lll_lock (stack_cache_lock);
 435
 436   /* Remove the thread from the list of threads with user defined
 437      stacks.  */
 438   list_del (&pd->header.data.list);
 439
 440   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
 441      not reset the 'used' flag in the 'tid' field.  This is done by
 442      the kernel.  If no thread has been created yet this field is
 443      still zero.  */
 444   if (__builtin_expect (! pd->user_stack, 1))
 445     (void) queue_stack (pd);
 446   else
 447     /* Free the memory associated with the ELF TLS.  */
 448     _dl_deallocate_tls (pd, false);
 449
 450   /* One less thread.  */
 451   --nptl_nthreads;
 452
 453   lll_unlock (stack_cache_lock);
 454 }
 455
 456
 457 /* In case of a fork() call the memory allocation in the child will be
 458    the same but only one thread is running.  All stacks except that of
 459    the one running thread are not used anymore.  We have to recycle
 460    them.  */
 461 void
 462 __reclaim_stacks (void)
 463 {
 464   struct pthread *self = (struct pthread *) THREAD_SELF;
 465
 466   /* No locking necessary.  The caller is the only stack in use.  */
 467
 468   /* Mark all stacks except the still running one as free.  */
 469   list_t *runp;
 470   list_for_each (runp, &stack_used)
 471     {
 472       struct pthread *curp;
 473
 474       curp = list_entry (runp, struct pthread, header.data.list);
 475       if (curp != self)
 476         {
 477           /* This marks the stack as free.  */
 478           curp->tid = 0;
 479
 480           /* Account for the size of the stack.  */
 481           stack_cache_actsize += curp->stackblock_size;
 482         }
 483     }
 484
 485   /* Add the stack of all running threads to the cache.  */
 486   list_splice (&stack_used, &stack_cache);
 487
 488   /* Remove the entry for the current thread to from the cache list
 489      and add it to the list of running threads.  Which of the two
 490      lists is decided by the user_stack flag.  */
 491   list_del (&self->header.data.list);
 492
 493   /* Re-initialize the lists for all the threads.  */
 494   INIT_LIST_HEAD (&stack_used);
 495   INIT_LIST_HEAD (&__stack_user);
 496
 497   if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
 498     list_add (&self->header.data.list, &__stack_user);
 499   else
 500     list_add (&self->header.data.list, &stack_used);
 501
 502   /* There is one thread running.  */
 503   nptl_nthreads = 1;
 504
 505   /* Initialize the lock.  */
 506   stack_cache_lock = LLL_LOCK_INITIALIZER;
 507 }