Avoid attempt for runtime checks if all environments are defined
[glibc.git] / nptl / allocatestack.c
blob6b42b11d5a77848a15a557efddddf2205d95c2ca
1 /* Copyright (C) 2002-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/mman.h>
26 #include <sys/param.h>
27 #include <dl-sysdep.h>
28 #include <dl-tls.h>
29 #include <tls.h>
30 #include <list.h>
31 #include <lowlevellock.h>
32 #include <futex-internal.h>
33 #include <kernel-features.h>
34 #include <stack-aliasing.h>
37 #ifndef NEED_SEPARATE_REGISTER_STACK
39 /* Most architectures have exactly one stack pointer. Some have more. */
40 # define STACK_VARIABLES void *stackaddr = NULL
42 /* How to pass the values to the 'create_thread' function. */
43 # define STACK_VARIABLES_ARGS stackaddr
45 /* How to declare function which gets there parameters. */
46 # define STACK_VARIABLES_PARMS void *stackaddr
48 /* How to declare allocate_stack. */
49 # define ALLOCATE_STACK_PARMS void **stack
51 /* This is how the function is called. We do it this way to allow
52 other variants of the function to have more parameters. */
53 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
55 #else
57 /* We need two stacks. The kernel will place them but we have to tell
58 the kernel about the size of the reserved address space. */
59 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
61 /* How to pass the values to the 'create_thread' function. */
62 # define STACK_VARIABLES_ARGS stackaddr, stacksize
64 /* How to declare function which gets there parameters. */
65 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
67 /* How to declare allocate_stack. */
68 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
70 /* This is how the function is called. We do it this way to allow
71 other variants of the function to have more parameters. */
72 # define ALLOCATE_STACK(attr, pd) \
73 allocate_stack (attr, pd, &stackaddr, &stacksize)
75 #endif
78 /* Default alignment of stack. */
79 #ifndef STACK_ALIGN
80 # define STACK_ALIGN __alignof__ (long double)
81 #endif
83 /* Default value for minimal stack size after allocating thread
84 descriptor and guard. */
85 #ifndef MINIMAL_REST_STACK
86 # define MINIMAL_REST_STACK 4096
87 #endif
90 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
91 a stack. Use it when possible. */
92 #ifndef MAP_STACK
93 # define MAP_STACK 0
94 #endif
96 /* This yields the pointer that TLS support code calls the thread pointer. */
97 #if TLS_TCB_AT_TP
98 # define TLS_TPADJ(pd) (pd)
99 #elif TLS_DTV_AT_TP
100 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
101 #endif
103 /* Cache handling for not-yet free stacks. */
105 /* Maximum size in kB of cache. */
106 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
107 static size_t stack_cache_actsize;
109 /* Mutex protecting this variable. */
110 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
112 /* List of queued stack frames. */
113 static LIST_HEAD (stack_cache);
115 /* List of the stacks in use. */
116 static LIST_HEAD (stack_used);
118 /* We need to record what list operations we are going to do so that,
119 in case of an asynchronous interruption due to a fork() call, we
120 can correct for the work. */
121 static uintptr_t in_flight_stack;
123 /* List of the threads with user provided stacks in use. No need to
124 initialize this, since it's done in __pthread_initialize_minimal. */
125 list_t __stack_user __attribute__ ((nocommon));
126 hidden_data_def (__stack_user)
128 #if COLORING_INCREMENT != 0
129 /* Number of threads created. */
130 static unsigned int nptl_ncreated;
131 #endif
134 /* Check whether the stack is still used or not. */
135 #define FREE_P(descr) ((descr)->tid <= 0)
138 static void
139 stack_list_del (list_t *elem)
141 in_flight_stack = (uintptr_t) elem;
143 atomic_write_barrier ();
145 list_del (elem);
147 atomic_write_barrier ();
149 in_flight_stack = 0;
153 static void
154 stack_list_add (list_t *elem, list_t *list)
156 in_flight_stack = (uintptr_t) elem | 1;
158 atomic_write_barrier ();
160 list_add (elem, list);
162 atomic_write_barrier ();
164 in_flight_stack = 0;
168 /* We create a double linked list of all cache entries. Double linked
169 because this allows removing entries from the end. */
172 /* Get a stack frame from the cache. We have to match by size since
173 some blocks might be too small or far too large. */
174 static struct pthread *
175 get_cached_stack (size_t *sizep, void **memp)
177 size_t size = *sizep;
178 struct pthread *result = NULL;
179 list_t *entry;
181 lll_lock (stack_cache_lock, LLL_PRIVATE);
183 /* Search the cache for a matching entry. We search for the
184 smallest stack which has at least the required size. Note that
185 in normal situations the size of all allocated stacks is the
186 same. As the very least there are only a few different sizes.
187 Therefore this loop will exit early most of the time with an
188 exact match. */
189 list_for_each (entry, &stack_cache)
191 struct pthread *curr;
193 curr = list_entry (entry, struct pthread, list);
194 if (FREE_P (curr) && curr->stackblock_size >= size)
196 if (curr->stackblock_size == size)
198 result = curr;
199 break;
202 if (result == NULL
203 || result->stackblock_size > curr->stackblock_size)
204 result = curr;
208 if (__builtin_expect (result == NULL, 0)
209 /* Make sure the size difference is not too excessive. In that
210 case we do not use the block. */
211 || __builtin_expect (result->stackblock_size > 4 * size, 0))
213 /* Release the lock. */
214 lll_unlock (stack_cache_lock, LLL_PRIVATE);
216 return NULL;
219 /* Don't allow setxid until cloned. */
220 result->setxid_futex = -1;
222 /* Dequeue the entry. */
223 stack_list_del (&result->list);
225 /* And add to the list of stacks in use. */
226 stack_list_add (&result->list, &stack_used);
228 /* And decrease the cache size. */
229 stack_cache_actsize -= result->stackblock_size;
231 /* Release the lock early. */
232 lll_unlock (stack_cache_lock, LLL_PRIVATE);
234 /* Report size and location of the stack to the caller. */
235 *sizep = result->stackblock_size;
236 *memp = result->stackblock;
238 /* Cancellation handling is back to the default. */
239 result->cancelhandling = 0;
240 result->cleanup = NULL;
242 /* No pending event. */
243 result->nextevent = NULL;
245 /* Clear the DTV. */
246 dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
247 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
248 if (! dtv[1 + cnt].pointer.is_static
249 && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
250 free (dtv[1 + cnt].pointer.val);
251 memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
253 /* Re-initialize the TLS. */
254 _dl_allocate_tls_init (TLS_TPADJ (result));
256 return result;
260 /* Free stacks until cache size is lower than LIMIT. */
261 void
262 __free_stacks (size_t limit)
264 /* We reduce the size of the cache. Remove the last entries until
265 the size is below the limit. */
266 list_t *entry;
267 list_t *prev;
269 /* Search from the end of the list. */
270 list_for_each_prev_safe (entry, prev, &stack_cache)
272 struct pthread *curr;
274 curr = list_entry (entry, struct pthread, list);
275 if (FREE_P (curr))
277 /* Unlink the block. */
278 stack_list_del (entry);
280 /* Account for the freed memory. */
281 stack_cache_actsize -= curr->stackblock_size;
283 /* Free the memory associated with the ELF TLS. */
284 _dl_deallocate_tls (TLS_TPADJ (curr), false);
286 /* Remove this block. This should never fail. If it does
287 something is really wrong. */
288 if (munmap (curr->stackblock, curr->stackblock_size) != 0)
289 abort ();
291 /* Maybe we have freed enough. */
292 if (stack_cache_actsize <= limit)
293 break;
299 /* Add a stack frame which is not used anymore to the stack. Must be
300 called with the cache lock held. */
301 static inline void
302 __attribute ((always_inline))
303 queue_stack (struct pthread *stack)
305 /* We unconditionally add the stack to the list. The memory may
306 still be in use but it will not be reused until the kernel marks
307 the stack as not used anymore. */
308 stack_list_add (&stack->list, &stack_cache);
310 stack_cache_actsize += stack->stackblock_size;
311 if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
312 __free_stacks (stack_cache_maxsize);
316 static int
317 internal_function
318 change_stack_perm (struct pthread *pd
319 #ifdef NEED_SEPARATE_REGISTER_STACK
320 , size_t pagemask
321 #endif
324 #ifdef NEED_SEPARATE_REGISTER_STACK
325 void *stack = (pd->stackblock
326 + (((((pd->stackblock_size - pd->guardsize) / 2)
327 & pagemask) + pd->guardsize) & pagemask));
328 size_t len = pd->stackblock + pd->stackblock_size - stack;
329 #elif _STACK_GROWS_DOWN
330 void *stack = pd->stackblock + pd->guardsize;
331 size_t len = pd->stackblock_size - pd->guardsize;
332 #elif _STACK_GROWS_UP
333 void *stack = pd->stackblock;
334 size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
335 #else
336 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
337 #endif
338 if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
339 return errno;
341 return 0;
345 /* Returns a usable stack for a new thread either by allocating a
346 new stack or reusing a cached stack of sufficient size.
347 ATTR must be non-NULL and point to a valid pthread_attr.
348 PDP must be non-NULL. */
349 static int
350 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
351 ALLOCATE_STACK_PARMS)
353 struct pthread *pd;
354 size_t size;
355 size_t pagesize_m1 = __getpagesize () - 1;
357 assert (powerof2 (pagesize_m1 + 1));
358 assert (TCB_ALIGNMENT >= STACK_ALIGN);
360 /* Get the stack size from the attribute if it is set. Otherwise we
361 use the default we determined at start time. */
362 if (attr->stacksize != 0)
363 size = attr->stacksize;
364 else
366 lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
367 size = __default_pthread_attr.stacksize;
368 lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
371 /* Get memory for the stack. */
372 if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
374 uintptr_t adj;
375 char *stackaddr = (char *) attr->stackaddr;
377 /* Assume the same layout as the _STACK_GROWS_DOWN case, with struct
378 pthread at the top of the stack block. Later we adjust the guard
379 location and stack address to match the _STACK_GROWS_UP case. */
380 if (_STACK_GROWS_UP)
381 stackaddr += attr->stacksize;
383 /* If the user also specified the size of the stack make sure it
384 is large enough. */
385 if (attr->stacksize != 0
386 && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
387 return EINVAL;
389 /* Adjust stack size for alignment of the TLS block. */
390 #if TLS_TCB_AT_TP
391 adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
392 & __static_tls_align_m1;
393 assert (size > adj + TLS_TCB_SIZE);
394 #elif TLS_DTV_AT_TP
395 adj = ((uintptr_t) stackaddr - __static_tls_size)
396 & __static_tls_align_m1;
397 assert (size > adj);
398 #endif
400 /* The user provided some memory. Let's hope it matches the
401 size... We do not allocate guard pages if the user provided
402 the stack. It is the user's responsibility to do this if it
403 is wanted. */
404 #if TLS_TCB_AT_TP
405 pd = (struct pthread *) ((uintptr_t) stackaddr
406 - TLS_TCB_SIZE - adj);
407 #elif TLS_DTV_AT_TP
408 pd = (struct pthread *) (((uintptr_t) stackaddr
409 - __static_tls_size - adj)
410 - TLS_PRE_TCB_SIZE);
411 #endif
413 /* The user provided stack memory needs to be cleared. */
414 memset (pd, '\0', sizeof (struct pthread));
416 /* The first TSD block is included in the TCB. */
417 pd->specific[0] = pd->specific_1stblock;
419 /* Remember the stack-related values. */
420 pd->stackblock = (char *) stackaddr - size;
421 pd->stackblock_size = size;
423 /* This is a user-provided stack. It will not be queued in the
424 stack cache nor will the memory (except the TLS memory) be freed. */
425 pd->user_stack = true;
427 /* This is at least the second thread. */
428 pd->header.multiple_threads = 1;
429 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
430 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
431 #endif
433 #ifndef __ASSUME_PRIVATE_FUTEX
434 /* The thread must know when private futexes are supported. */
435 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
436 header.private_futex);
437 #endif
439 #ifdef NEED_DL_SYSINFO
440 SETUP_THREAD_SYSINFO (pd);
441 #endif
443 /* The process ID is also the same as that of the caller. */
444 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
446 /* Don't allow setxid until cloned. */
447 pd->setxid_futex = -1;
449 /* Allocate the DTV for this thread. */
450 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
452 /* Something went wrong. */
453 assert (errno == ENOMEM);
454 return errno;
458 /* Prepare to modify global data. */
459 lll_lock (stack_cache_lock, LLL_PRIVATE);
461 /* And add to the list of stacks in use. */
462 list_add (&pd->list, &__stack_user);
464 lll_unlock (stack_cache_lock, LLL_PRIVATE);
466 else
468 /* Allocate some anonymous memory. If possible use the cache. */
469 size_t guardsize;
470 size_t reqsize;
471 void *mem;
472 const int prot = (PROT_READ | PROT_WRITE
473 | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
475 #if COLORING_INCREMENT != 0
476 /* Add one more page for stack coloring. Don't do it for stacks
477 with 16 times pagesize or larger. This might just cause
478 unnecessary misalignment. */
479 if (size <= 16 * pagesize_m1)
480 size += pagesize_m1 + 1;
481 #endif
483 /* Adjust the stack size for alignment. */
484 size &= ~__static_tls_align_m1;
485 assert (size != 0);
487 /* Make sure the size of the stack is enough for the guard and
488 eventually the thread descriptor. */
489 guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
490 if (__builtin_expect (size < ((guardsize + __static_tls_size
491 + MINIMAL_REST_STACK + pagesize_m1)
492 & ~pagesize_m1),
494 /* The stack is too small (or the guard too large). */
495 return EINVAL;
497 /* Try to get a stack from the cache. */
498 reqsize = size;
499 pd = get_cached_stack (&size, &mem);
500 if (pd == NULL)
502 /* To avoid aliasing effects on a larger scale than pages we
503 adjust the allocated stack size if necessary. This way
504 allocations directly following each other will not have
505 aliasing problems. */
506 #if MULTI_PAGE_ALIASING != 0
507 if ((size % MULTI_PAGE_ALIASING) == 0)
508 size += pagesize_m1 + 1;
509 #endif
511 mem = mmap (NULL, size, prot,
512 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
514 if (__glibc_unlikely (mem == MAP_FAILED))
515 return errno;
517 /* SIZE is guaranteed to be greater than zero.
518 So we can never get a null pointer back from mmap. */
519 assert (mem != NULL);
521 #if COLORING_INCREMENT != 0
522 /* Atomically increment NCREATED. */
523 unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
525 /* We chose the offset for coloring by incrementing it for
526 every new thread by a fixed amount. The offset used
527 module the page size. Even if coloring would be better
528 relative to higher alignment values it makes no sense to
529 do it since the mmap() interface does not allow us to
530 specify any alignment for the returned memory block. */
531 size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
533 /* Make sure the coloring offsets does not disturb the alignment
534 of the TCB and static TLS block. */
535 if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
536 coloring = (((coloring + __static_tls_align_m1)
537 & ~(__static_tls_align_m1))
538 & ~pagesize_m1);
539 #else
540 /* Unless specified we do not make any adjustments. */
541 # define coloring 0
542 #endif
544 /* Place the thread descriptor at the end of the stack. */
545 #if TLS_TCB_AT_TP
546 pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
547 #elif TLS_DTV_AT_TP
548 pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
549 - __static_tls_size)
550 & ~__static_tls_align_m1)
551 - TLS_PRE_TCB_SIZE);
552 #endif
554 /* Remember the stack-related values. */
555 pd->stackblock = mem;
556 pd->stackblock_size = size;
558 /* We allocated the first block thread-specific data array.
559 This address will not change for the lifetime of this
560 descriptor. */
561 pd->specific[0] = pd->specific_1stblock;
563 /* This is at least the second thread. */
564 pd->header.multiple_threads = 1;
565 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
566 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
567 #endif
569 #ifndef __ASSUME_PRIVATE_FUTEX
570 /* The thread must know when private futexes are supported. */
571 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
572 header.private_futex);
573 #endif
575 #ifdef NEED_DL_SYSINFO
576 SETUP_THREAD_SYSINFO (pd);
577 #endif
579 /* Don't allow setxid until cloned. */
580 pd->setxid_futex = -1;
582 /* The process ID is also the same as that of the caller. */
583 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
585 /* Allocate the DTV for this thread. */
586 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
588 /* Something went wrong. */
589 assert (errno == ENOMEM);
591 /* Free the stack memory we just allocated. */
592 (void) munmap (mem, size);
594 return errno;
598 /* Prepare to modify global data. */
599 lll_lock (stack_cache_lock, LLL_PRIVATE);
601 /* And add to the list of stacks in use. */
602 stack_list_add (&pd->list, &stack_used);
604 lll_unlock (stack_cache_lock, LLL_PRIVATE);
607 /* There might have been a race. Another thread might have
608 caused the stacks to get exec permission while this new
609 stack was prepared. Detect if this was possible and
610 change the permission if necessary. */
611 if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
612 && (prot & PROT_EXEC) == 0, 0))
614 int err = change_stack_perm (pd
615 #ifdef NEED_SEPARATE_REGISTER_STACK
616 , ~pagesize_m1
617 #endif
619 if (err != 0)
621 /* Free the stack memory we just allocated. */
622 (void) munmap (mem, size);
624 return err;
629 /* Note that all of the stack and the thread descriptor is
630 zeroed. This means we do not have to initialize fields
631 with initial value zero. This is specifically true for
632 the 'tid' field which is always set back to zero once the
633 stack is not used anymore and for the 'guardsize' field
634 which will be read next. */
637 /* Create or resize the guard area if necessary. */
638 if (__glibc_unlikely (guardsize > pd->guardsize))
640 #ifdef NEED_SEPARATE_REGISTER_STACK
641 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
642 #elif _STACK_GROWS_DOWN
643 char *guard = mem;
644 #elif _STACK_GROWS_UP
645 char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
646 #endif
647 if (mprotect (guard, guardsize, PROT_NONE) != 0)
649 mprot_error:
650 lll_lock (stack_cache_lock, LLL_PRIVATE);
652 /* Remove the thread from the list. */
653 stack_list_del (&pd->list);
655 lll_unlock (stack_cache_lock, LLL_PRIVATE);
657 /* Get rid of the TLS block we allocated. */
658 _dl_deallocate_tls (TLS_TPADJ (pd), false);
660 /* Free the stack memory regardless of whether the size
661 of the cache is over the limit or not. If this piece
662 of memory caused problems we better do not use it
663 anymore. Uh, and we ignore possible errors. There
664 is nothing we could do. */
665 (void) munmap (mem, size);
667 return errno;
670 pd->guardsize = guardsize;
672 else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
675 /* The old guard area is too large. */
677 #ifdef NEED_SEPARATE_REGISTER_STACK
678 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
679 char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
681 if (oldguard < guard
682 && mprotect (oldguard, guard - oldguard, prot) != 0)
683 goto mprot_error;
685 if (mprotect (guard + guardsize,
686 oldguard + pd->guardsize - guard - guardsize,
687 prot) != 0)
688 goto mprot_error;
689 #elif _STACK_GROWS_DOWN
690 if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
691 prot) != 0)
692 goto mprot_error;
693 #elif _STACK_GROWS_UP
694 if (mprotect ((char *) pd - pd->guardsize,
695 pd->guardsize - guardsize, prot) != 0)
696 goto mprot_error;
697 #endif
699 pd->guardsize = guardsize;
701 /* The pthread_getattr_np() calls need to get passed the size
702 requested in the attribute, regardless of how large the
703 actually used guardsize is. */
704 pd->reported_guardsize = guardsize;
707 /* Initialize the lock. We have to do this unconditionally since the
708 stillborn thread could be canceled while the lock is taken. */
709 pd->lock = LLL_LOCK_INITIALIZER;
711 /* The robust mutex lists also need to be initialized
712 unconditionally because the cleanup for the previous stack owner
713 might have happened in the kernel. */
714 pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
715 - offsetof (pthread_mutex_t,
716 __data.__list.__next));
717 pd->robust_head.list_op_pending = NULL;
718 #ifdef __PTHREAD_MUTEX_HAVE_PREV
719 pd->robust_prev = &pd->robust_head;
720 #endif
721 pd->robust_head.list = &pd->robust_head;
723 /* We place the thread descriptor at the end of the stack. */
724 *pdp = pd;
726 #if _STACK_GROWS_DOWN
727 void *stacktop;
729 # if TLS_TCB_AT_TP
730 /* The stack begins before the TCB and the static TLS block. */
731 stacktop = ((char *) (pd + 1) - __static_tls_size);
732 # elif TLS_DTV_AT_TP
733 stacktop = (char *) (pd - 1);
734 # endif
736 # ifdef NEED_SEPARATE_REGISTER_STACK
737 *stack = pd->stackblock;
738 *stacksize = stacktop - *stack;
739 # else
740 *stack = stacktop;
741 # endif
742 #else
743 *stack = pd->stackblock;
744 #endif
746 return 0;
750 void
751 internal_function
752 __deallocate_stack (struct pthread *pd)
754 lll_lock (stack_cache_lock, LLL_PRIVATE);
756 /* Remove the thread from the list of threads with user defined
757 stacks. */
758 stack_list_del (&pd->list);
760 /* Not much to do. Just free the mmap()ed memory. Note that we do
761 not reset the 'used' flag in the 'tid' field. This is done by
762 the kernel. If no thread has been created yet this field is
763 still zero. */
764 if (__glibc_likely (! pd->user_stack))
765 (void) queue_stack (pd);
766 else
767 /* Free the memory associated with the ELF TLS. */
768 _dl_deallocate_tls (TLS_TPADJ (pd), false);
770 lll_unlock (stack_cache_lock, LLL_PRIVATE);
775 internal_function
776 __make_stacks_executable (void **stack_endp)
778 /* First the main thread's stack. */
779 int err = _dl_make_stack_executable (stack_endp);
780 if (err != 0)
781 return err;
783 #ifdef NEED_SEPARATE_REGISTER_STACK
784 const size_t pagemask = ~(__getpagesize () - 1);
785 #endif
787 lll_lock (stack_cache_lock, LLL_PRIVATE);
789 list_t *runp;
790 list_for_each (runp, &stack_used)
792 err = change_stack_perm (list_entry (runp, struct pthread, list)
793 #ifdef NEED_SEPARATE_REGISTER_STACK
794 , pagemask
795 #endif
797 if (err != 0)
798 break;
801 /* Also change the permission for the currently unused stacks. This
802 might be wasted time but better spend it here than adding a check
803 in the fast path. */
804 if (err == 0)
805 list_for_each (runp, &stack_cache)
807 err = change_stack_perm (list_entry (runp, struct pthread, list)
808 #ifdef NEED_SEPARATE_REGISTER_STACK
809 , pagemask
810 #endif
812 if (err != 0)
813 break;
816 lll_unlock (stack_cache_lock, LLL_PRIVATE);
818 return err;
822 /* In case of a fork() call the memory allocation in the child will be
823 the same but only one thread is running. All stacks except that of
824 the one running thread are not used anymore. We have to recycle
825 them. */
826 void
827 __reclaim_stacks (void)
829 struct pthread *self = (struct pthread *) THREAD_SELF;
831 /* No locking necessary. The caller is the only stack in use. But
832 we have to be aware that we might have interrupted a list
833 operation. */
835 if (in_flight_stack != 0)
837 bool add_p = in_flight_stack & 1;
838 list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
840 if (add_p)
842 /* We always add at the beginning of the list. So in this case we
843 only need to check the beginning of these lists to see if the
844 pointers at the head of the list are inconsistent. */
845 list_t *l = NULL;
847 if (stack_used.next->prev != &stack_used)
848 l = &stack_used;
849 else if (stack_cache.next->prev != &stack_cache)
850 l = &stack_cache;
852 if (l != NULL)
854 assert (l->next->prev == elem);
855 elem->next = l->next;
856 elem->prev = l;
857 l->next = elem;
860 else
862 /* We can simply always replay the delete operation. */
863 elem->next->prev = elem->prev;
864 elem->prev->next = elem->next;
868 /* Mark all stacks except the still running one as free. */
869 list_t *runp;
870 list_for_each (runp, &stack_used)
872 struct pthread *curp = list_entry (runp, struct pthread, list);
873 if (curp != self)
875 /* This marks the stack as free. */
876 curp->tid = 0;
878 /* The PID field must be initialized for the new process. */
879 curp->pid = self->pid;
881 /* Account for the size of the stack. */
882 stack_cache_actsize += curp->stackblock_size;
884 if (curp->specific_used)
886 /* Clear the thread-specific data. */
887 memset (curp->specific_1stblock, '\0',
888 sizeof (curp->specific_1stblock));
890 curp->specific_used = false;
892 for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
893 if (curp->specific[cnt] != NULL)
895 memset (curp->specific[cnt], '\0',
896 sizeof (curp->specific_1stblock));
898 /* We have allocated the block which we do not
899 free here so re-set the bit. */
900 curp->specific_used = true;
906 /* Reset the PIDs in any cached stacks. */
907 list_for_each (runp, &stack_cache)
909 struct pthread *curp = list_entry (runp, struct pthread, list);
910 curp->pid = self->pid;
913 /* Add the stack of all running threads to the cache. */
914 list_splice (&stack_used, &stack_cache);
916 /* Remove the entry for the current thread to from the cache list
917 and add it to the list of running threads. Which of the two
918 lists is decided by the user_stack flag. */
919 stack_list_del (&self->list);
921 /* Re-initialize the lists for all the threads. */
922 INIT_LIST_HEAD (&stack_used);
923 INIT_LIST_HEAD (&__stack_user);
925 if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
926 list_add (&self->list, &__stack_user);
927 else
928 list_add (&self->list, &stack_used);
930 /* There is one thread running. */
931 __nptl_nthreads = 1;
933 in_flight_stack = 0;
935 /* Initialize locks. */
936 stack_cache_lock = LLL_LOCK_INITIALIZER;
937 __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
941 #if HP_TIMING_AVAIL
942 # undef __find_thread_by_id
943 /* Find a thread given the thread ID. */
944 attribute_hidden
945 struct pthread *
946 __find_thread_by_id (pid_t tid)
948 struct pthread *result = NULL;
950 lll_lock (stack_cache_lock, LLL_PRIVATE);
952 /* Iterate over the list with system-allocated threads first. */
953 list_t *runp;
954 list_for_each (runp, &stack_used)
956 struct pthread *curp;
958 curp = list_entry (runp, struct pthread, list);
960 if (curp->tid == tid)
962 result = curp;
963 goto out;
967 /* Now the list with threads using user-allocated stacks. */
968 list_for_each (runp, &__stack_user)
970 struct pthread *curp;
972 curp = list_entry (runp, struct pthread, list);
974 if (curp->tid == tid)
976 result = curp;
977 goto out;
981 out:
982 lll_unlock (stack_cache_lock, LLL_PRIVATE);
984 return result;
986 #endif
989 #ifdef SIGSETXID
990 static void
991 internal_function
992 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
994 int ch;
996 /* Wait until this thread is cloned. */
997 if (t->setxid_futex == -1
998 && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
1000 futex_wait_simple (&t->setxid_futex, -2, FUTEX_PRIVATE);
1001 while (t->setxid_futex == -2);
1003 /* Don't let the thread exit before the setxid handler runs. */
1004 t->setxid_futex = 0;
1008 ch = t->cancelhandling;
1010 /* If the thread is exiting right now, ignore it. */
1011 if ((ch & EXITING_BITMASK) != 0)
1013 /* Release the futex if there is no other setxid in
1014 progress. */
1015 if ((ch & SETXID_BITMASK) == 0)
1017 t->setxid_futex = 1;
1018 futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
1020 return;
1023 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1024 ch | SETXID_BITMASK, ch));
1028 static void
1029 internal_function
1030 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
1032 int ch;
1036 ch = t->cancelhandling;
1037 if ((ch & SETXID_BITMASK) == 0)
1038 return;
1040 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1041 ch & ~SETXID_BITMASK, ch));
1043 /* Release the futex just in case. */
1044 t->setxid_futex = 1;
1045 futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
1049 static int
1050 internal_function
1051 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
1053 if ((t->cancelhandling & SETXID_BITMASK) == 0)
1054 return 0;
1056 int val;
1057 INTERNAL_SYSCALL_DECL (err);
1058 val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
1059 t->tid, SIGSETXID);
1061 /* If this failed, it must have had not started yet or else exited. */
1062 if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1064 atomic_increment (&cmdp->cntr);
1065 return 1;
1067 else
1068 return 0;
1071 /* Check for consistency across set*id system call results. The abort
1072 should not happen as long as all privileges changes happen through
1073 the glibc wrappers. ERROR must be 0 (no error) or an errno
1074 code. */
1075 void
1076 attribute_hidden
1077 __nptl_setxid_error (struct xid_command *cmdp, int error)
1081 int olderror = cmdp->error;
1082 if (olderror == error)
1083 break;
1084 if (olderror != -1)
1085 /* Mismatch between current and previous results. */
1086 abort ();
1088 while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -1));
1092 attribute_hidden
1093 __nptl_setxid (struct xid_command *cmdp)
1095 int signalled;
1096 int result;
1097 lll_lock (stack_cache_lock, LLL_PRIVATE);
1099 __xidcmd = cmdp;
1100 cmdp->cntr = 0;
1101 cmdp->error = -1;
1103 struct pthread *self = THREAD_SELF;
1105 /* Iterate over the list with system-allocated threads first. */
1106 list_t *runp;
1107 list_for_each (runp, &stack_used)
1109 struct pthread *t = list_entry (runp, struct pthread, list);
1110 if (t == self)
1111 continue;
1113 setxid_mark_thread (cmdp, t);
1116 /* Now the list with threads using user-allocated stacks. */
1117 list_for_each (runp, &__stack_user)
1119 struct pthread *t = list_entry (runp, struct pthread, list);
1120 if (t == self)
1121 continue;
1123 setxid_mark_thread (cmdp, t);
1126 /* Iterate until we don't succeed in signalling anyone. That means
1127 we have gotten all running threads, and their children will be
1128 automatically correct once started. */
1131 signalled = 0;
1133 list_for_each (runp, &stack_used)
1135 struct pthread *t = list_entry (runp, struct pthread, list);
1136 if (t == self)
1137 continue;
1139 signalled += setxid_signal_thread (cmdp, t);
1142 list_for_each (runp, &__stack_user)
1144 struct pthread *t = list_entry (runp, struct pthread, list);
1145 if (t == self)
1146 continue;
1148 signalled += setxid_signal_thread (cmdp, t);
1151 int cur = cmdp->cntr;
1152 while (cur != 0)
1154 futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
1155 FUTEX_PRIVATE);
1156 cur = cmdp->cntr;
1159 while (signalled != 0);
1161 /* Clean up flags, so that no thread blocks during exit waiting
1162 for a signal which will never come. */
1163 list_for_each (runp, &stack_used)
1165 struct pthread *t = list_entry (runp, struct pthread, list);
1166 if (t == self)
1167 continue;
1169 setxid_unmark_thread (cmdp, t);
1172 list_for_each (runp, &__stack_user)
1174 struct pthread *t = list_entry (runp, struct pthread, list);
1175 if (t == self)
1176 continue;
1178 setxid_unmark_thread (cmdp, t);
1181 /* This must be last, otherwise the current thread might not have
1182 permissions to send SIGSETXID syscall to the other threads. */
1183 INTERNAL_SYSCALL_DECL (err);
1184 result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1185 cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1186 int error = 0;
1187 if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
1189 error = INTERNAL_SYSCALL_ERRNO (result, err);
1190 __set_errno (error);
1191 result = -1;
1193 __nptl_setxid_error (cmdp, error);
1195 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1196 return result;
1198 #endif /* SIGSETXID. */
1201 static inline void __attribute__((always_inline))
1202 init_one_static_tls (struct pthread *curp, struct link_map *map)
1204 # if TLS_TCB_AT_TP
1205 void *dest = (char *) curp - map->l_tls_offset;
1206 # elif TLS_DTV_AT_TP
1207 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1208 # else
1209 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1210 # endif
1212 /* We cannot delay the initialization of the Static TLS area, since
1213 it can be accessed with LE or IE, but since the DTV is only used
1214 by GD and LD, we can delay its update to avoid a race. */
1215 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1216 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1219 void
1220 attribute_hidden
1221 __pthread_init_static_tls (struct link_map *map)
1223 lll_lock (stack_cache_lock, LLL_PRIVATE);
1225 /* Iterate over the list with system-allocated threads first. */
1226 list_t *runp;
1227 list_for_each (runp, &stack_used)
1228 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1230 /* Now the list with threads using user-allocated stacks. */
1231 list_for_each (runp, &__stack_user)
1232 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1234 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1238 void
1239 attribute_hidden
1240 __wait_lookup_done (void)
1242 lll_lock (stack_cache_lock, LLL_PRIVATE);
1244 struct pthread *self = THREAD_SELF;
1246 /* Iterate over the list with system-allocated threads first. */
1247 list_t *runp;
1248 list_for_each (runp, &stack_used)
1250 struct pthread *t = list_entry (runp, struct pthread, list);
1251 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1252 continue;
1254 int *const gscope_flagp = &t->header.gscope_flag;
1256 /* We have to wait until this thread is done with the global
1257 scope. First tell the thread that we are waiting and
1258 possibly have to be woken. */
1259 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1260 THREAD_GSCOPE_FLAG_WAIT,
1261 THREAD_GSCOPE_FLAG_USED))
1262 continue;
1265 futex_wait_simple ((unsigned int *) gscope_flagp,
1266 THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1267 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1270 /* Now the list with threads using user-allocated stacks. */
1271 list_for_each (runp, &__stack_user)
1273 struct pthread *t = list_entry (runp, struct pthread, list);
1274 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1275 continue;
1277 int *const gscope_flagp = &t->header.gscope_flag;
1279 /* We have to wait until this thread is done with the global
1280 scope. First tell the thread that we are waiting and
1281 possibly have to be woken. */
1282 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1283 THREAD_GSCOPE_FLAG_WAIT,
1284 THREAD_GSCOPE_FLAG_USED))
1285 continue;
1288 futex_wait_simple ((unsigned int *) gscope_flagp,
1289 THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1290 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1293 lll_unlock (stack_cache_lock, LLL_PRIVATE);