Set arch_minimum_kernel to 3.4.0 for x32
[glibc.git] / nptl / allocatestack.c
blob1e0fe1f18dc87b522cb30298125bdb2488e41752
1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/mman.h>
26 #include <sys/param.h>
27 #include <dl-sysdep.h>
28 #include <dl-tls.h>
29 #include <tls.h>
30 #include <list.h>
31 #include <lowlevellock.h>
32 #include <kernel-features.h>
35 #ifndef NEED_SEPARATE_REGISTER_STACK
37 /* Most architectures have exactly one stack pointer. Some have more. */
38 # define STACK_VARIABLES void *stackaddr = NULL
40 /* How to pass the values to the 'create_thread' function. */
41 # define STACK_VARIABLES_ARGS stackaddr
43 /* How to declare function which gets there parameters. */
44 # define STACK_VARIABLES_PARMS void *stackaddr
46 /* How to declare allocate_stack. */
47 # define ALLOCATE_STACK_PARMS void **stack
49 /* This is how the function is called. We do it this way to allow
50 other variants of the function to have more parameters. */
51 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
53 #else
55 /* We need two stacks. The kernel will place them but we have to tell
56 the kernel about the size of the reserved address space. */
57 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
59 /* How to pass the values to the 'create_thread' function. */
60 # define STACK_VARIABLES_ARGS stackaddr, stacksize
62 /* How to declare function which gets there parameters. */
63 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
65 /* How to declare allocate_stack. */
66 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
68 /* This is how the function is called. We do it this way to allow
69 other variants of the function to have more parameters. */
70 # define ALLOCATE_STACK(attr, pd) \
71 allocate_stack (attr, pd, &stackaddr, &stacksize)
73 #endif
76 /* Default alignment of stack. */
77 #ifndef STACK_ALIGN
78 # define STACK_ALIGN __alignof__ (long double)
79 #endif
81 /* Default value for minimal stack size after allocating thread
82 descriptor and guard. */
83 #ifndef MINIMAL_REST_STACK
84 # define MINIMAL_REST_STACK 4096
85 #endif
88 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
89 a stack. Use it when possible. */
90 #ifndef MAP_STACK
91 # define MAP_STACK 0
92 #endif
94 /* This yields the pointer that TLS support code calls the thread pointer. */
95 #if TLS_TCB_AT_TP
96 # define TLS_TPADJ(pd) (pd)
97 #elif TLS_DTV_AT_TP
98 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
99 #endif
101 /* Cache handling for not-yet free stacks. */
103 /* Maximum size in kB of cache. */
104 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
105 static size_t stack_cache_actsize;
107 /* Mutex protecting this variable. */
108 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
110 /* List of queued stack frames. */
111 static LIST_HEAD (stack_cache);
113 /* List of the stacks in use. */
114 static LIST_HEAD (stack_used);
116 /* We need to record what list operations we are going to do so that,
117 in case of an asynchronous interruption due to a fork() call, we
118 can correct for the work. */
119 static uintptr_t in_flight_stack;
121 /* List of the threads with user provided stacks in use. No need to
122 initialize this, since it's done in __pthread_initialize_minimal. */
123 list_t __stack_user __attribute__ ((nocommon));
124 hidden_data_def (__stack_user)
126 #if COLORING_INCREMENT != 0
127 /* Number of threads created. */
128 static unsigned int nptl_ncreated;
129 #endif
132 /* Check whether the stack is still used or not. */
133 #define FREE_P(descr) ((descr)->tid <= 0)
136 static void
137 stack_list_del (list_t *elem)
139 in_flight_stack = (uintptr_t) elem;
141 atomic_write_barrier ();
143 list_del (elem);
145 atomic_write_barrier ();
147 in_flight_stack = 0;
151 static void
152 stack_list_add (list_t *elem, list_t *list)
154 in_flight_stack = (uintptr_t) elem | 1;
156 atomic_write_barrier ();
158 list_add (elem, list);
160 atomic_write_barrier ();
162 in_flight_stack = 0;
166 /* We create a double linked list of all cache entries. Double linked
167 because this allows removing entries from the end. */
170 /* Get a stack frame from the cache. We have to match by size since
171 some blocks might be too small or far too large. */
172 static struct pthread *
173 get_cached_stack (size_t *sizep, void **memp)
175 size_t size = *sizep;
176 struct pthread *result = NULL;
177 list_t *entry;
179 lll_lock (stack_cache_lock, LLL_PRIVATE);
181 /* Search the cache for a matching entry. We search for the
182 smallest stack which has at least the required size. Note that
183 in normal situations the size of all allocated stacks is the
184 same. As the very least there are only a few different sizes.
185 Therefore this loop will exit early most of the time with an
186 exact match. */
187 list_for_each (entry, &stack_cache)
189 struct pthread *curr;
191 curr = list_entry (entry, struct pthread, list);
192 if (FREE_P (curr) && curr->stackblock_size >= size)
194 if (curr->stackblock_size == size)
196 result = curr;
197 break;
200 if (result == NULL
201 || result->stackblock_size > curr->stackblock_size)
202 result = curr;
206 if (__builtin_expect (result == NULL, 0)
207 /* Make sure the size difference is not too excessive. In that
208 case we do not use the block. */
209 || __builtin_expect (result->stackblock_size > 4 * size, 0))
211 /* Release the lock. */
212 lll_unlock (stack_cache_lock, LLL_PRIVATE);
214 return NULL;
217 /* Don't allow setxid until cloned. */
218 result->setxid_futex = -1;
220 /* Dequeue the entry. */
221 stack_list_del (&result->list);
223 /* And add to the list of stacks in use. */
224 stack_list_add (&result->list, &stack_used);
226 /* And decrease the cache size. */
227 stack_cache_actsize -= result->stackblock_size;
229 /* Release the lock early. */
230 lll_unlock (stack_cache_lock, LLL_PRIVATE);
232 /* Report size and location of the stack to the caller. */
233 *sizep = result->stackblock_size;
234 *memp = result->stackblock;
236 /* Cancellation handling is back to the default. */
237 result->cancelhandling = 0;
238 result->cleanup = NULL;
240 /* No pending event. */
241 result->nextevent = NULL;
243 /* Clear the DTV. */
244 dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
245 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
246 if (! dtv[1 + cnt].pointer.is_static
247 && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
248 free (dtv[1 + cnt].pointer.val);
249 memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
251 /* Re-initialize the TLS. */
252 _dl_allocate_tls_init (TLS_TPADJ (result));
254 return result;
258 /* Free stacks until cache size is lower than LIMIT. */
259 void
260 __free_stacks (size_t limit)
262 /* We reduce the size of the cache. Remove the last entries until
263 the size is below the limit. */
264 list_t *entry;
265 list_t *prev;
267 /* Search from the end of the list. */
268 list_for_each_prev_safe (entry, prev, &stack_cache)
270 struct pthread *curr;
272 curr = list_entry (entry, struct pthread, list);
273 if (FREE_P (curr))
275 /* Unlink the block. */
276 stack_list_del (entry);
278 /* Account for the freed memory. */
279 stack_cache_actsize -= curr->stackblock_size;
281 /* Free the memory associated with the ELF TLS. */
282 _dl_deallocate_tls (TLS_TPADJ (curr), false);
284 /* Remove this block. This should never fail. If it does
285 something is really wrong. */
286 if (munmap (curr->stackblock, curr->stackblock_size) != 0)
287 abort ();
289 /* Maybe we have freed enough. */
290 if (stack_cache_actsize <= limit)
291 break;
297 /* Add a stack frame which is not used anymore to the stack. Must be
298 called with the cache lock held. */
299 static inline void
300 __attribute ((always_inline))
301 queue_stack (struct pthread *stack)
303 /* We unconditionally add the stack to the list. The memory may
304 still be in use but it will not be reused until the kernel marks
305 the stack as not used anymore. */
306 stack_list_add (&stack->list, &stack_cache);
308 stack_cache_actsize += stack->stackblock_size;
309 if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
310 __free_stacks (stack_cache_maxsize);
314 static int
315 internal_function
316 change_stack_perm (struct pthread *pd
317 #ifdef NEED_SEPARATE_REGISTER_STACK
318 , size_t pagemask
319 #endif
322 #ifdef NEED_SEPARATE_REGISTER_STACK
323 void *stack = (pd->stackblock
324 + (((((pd->stackblock_size - pd->guardsize) / 2)
325 & pagemask) + pd->guardsize) & pagemask));
326 size_t len = pd->stackblock + pd->stackblock_size - stack;
327 #elif _STACK_GROWS_DOWN
328 void *stack = pd->stackblock + pd->guardsize;
329 size_t len = pd->stackblock_size - pd->guardsize;
330 #elif _STACK_GROWS_UP
331 void *stack = pd->stackblock;
332 size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
333 #else
334 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
335 #endif
336 if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
337 return errno;
339 return 0;
343 /* Returns a usable stack for a new thread either by allocating a
344 new stack or reusing a cached stack of sufficient size.
345 ATTR must be non-NULL and point to a valid pthread_attr.
346 PDP must be non-NULL. */
347 static int
348 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
349 ALLOCATE_STACK_PARMS)
351 struct pthread *pd;
352 size_t size;
353 size_t pagesize_m1 = __getpagesize () - 1;
354 void *stacktop;
356 assert (powerof2 (pagesize_m1 + 1));
357 assert (TCB_ALIGNMENT >= STACK_ALIGN);
359 /* Get the stack size from the attribute if it is set. Otherwise we
360 use the default we determined at start time. */
361 if (attr->stacksize != 0)
362 size = attr->stacksize;
363 else
365 lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
366 size = __default_pthread_attr.stacksize;
367 lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
370 /* Get memory for the stack. */
371 if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
373 uintptr_t adj;
375 /* If the user also specified the size of the stack make sure it
376 is large enough. */
377 if (attr->stacksize != 0
378 && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
379 return EINVAL;
381 /* Adjust stack size for alignment of the TLS block. */
382 #if TLS_TCB_AT_TP
383 adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
384 & __static_tls_align_m1;
385 assert (size > adj + TLS_TCB_SIZE);
386 #elif TLS_DTV_AT_TP
387 adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
388 & __static_tls_align_m1;
389 assert (size > adj);
390 #endif
392 /* The user provided some memory. Let's hope it matches the
393 size... We do not allocate guard pages if the user provided
394 the stack. It is the user's responsibility to do this if it
395 is wanted. */
396 #if TLS_TCB_AT_TP
397 pd = (struct pthread *) ((uintptr_t) attr->stackaddr
398 - TLS_TCB_SIZE - adj);
399 #elif TLS_DTV_AT_TP
400 pd = (struct pthread *) (((uintptr_t) attr->stackaddr
401 - __static_tls_size - adj)
402 - TLS_PRE_TCB_SIZE);
403 #endif
405 /* The user provided stack memory needs to be cleared. */
406 memset (pd, '\0', sizeof (struct pthread));
408 /* The first TSD block is included in the TCB. */
409 pd->specific[0] = pd->specific_1stblock;
411 /* Remember the stack-related values. */
412 pd->stackblock = (char *) attr->stackaddr - size;
413 pd->stackblock_size = size;
415 /* This is a user-provided stack. It will not be queued in the
416 stack cache nor will the memory (except the TLS memory) be freed. */
417 pd->user_stack = true;
419 /* This is at least the second thread. */
420 pd->header.multiple_threads = 1;
421 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
422 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
423 #endif
425 #ifndef __ASSUME_PRIVATE_FUTEX
426 /* The thread must know when private futexes are supported. */
427 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
428 header.private_futex);
429 #endif
431 #ifdef NEED_DL_SYSINFO
432 /* Copy the sysinfo value from the parent. */
433 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
434 #endif
436 /* The process ID is also the same as that of the caller. */
437 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
439 /* Don't allow setxid until cloned. */
440 pd->setxid_futex = -1;
442 /* Allocate the DTV for this thread. */
443 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
445 /* Something went wrong. */
446 assert (errno == ENOMEM);
447 return errno;
451 /* Prepare to modify global data. */
452 lll_lock (stack_cache_lock, LLL_PRIVATE);
454 /* And add to the list of stacks in use. */
455 list_add (&pd->list, &__stack_user);
457 lll_unlock (stack_cache_lock, LLL_PRIVATE);
459 else
461 /* Allocate some anonymous memory. If possible use the cache. */
462 size_t guardsize;
463 size_t reqsize;
464 void *mem;
465 const int prot = (PROT_READ | PROT_WRITE
466 | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
468 #if COLORING_INCREMENT != 0
469 /* Add one more page for stack coloring. Don't do it for stacks
470 with 16 times pagesize or larger. This might just cause
471 unnecessary misalignment. */
472 if (size <= 16 * pagesize_m1)
473 size += pagesize_m1 + 1;
474 #endif
476 /* Adjust the stack size for alignment. */
477 size &= ~__static_tls_align_m1;
478 assert (size != 0);
480 /* Make sure the size of the stack is enough for the guard and
481 eventually the thread descriptor. */
482 guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
483 if (__builtin_expect (size < ((guardsize + __static_tls_size
484 + MINIMAL_REST_STACK + pagesize_m1)
485 & ~pagesize_m1),
487 /* The stack is too small (or the guard too large). */
488 return EINVAL;
490 /* Try to get a stack from the cache. */
491 reqsize = size;
492 pd = get_cached_stack (&size, &mem);
493 if (pd == NULL)
495 /* To avoid aliasing effects on a larger scale than pages we
496 adjust the allocated stack size if necessary. This way
497 allocations directly following each other will not have
498 aliasing problems. */
499 #if MULTI_PAGE_ALIASING != 0
500 if ((size % MULTI_PAGE_ALIASING) == 0)
501 size += pagesize_m1 + 1;
502 #endif
504 mem = mmap (NULL, size, prot,
505 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
507 if (__builtin_expect (mem == MAP_FAILED, 0))
508 return errno;
510 /* SIZE is guaranteed to be greater than zero.
511 So we can never get a null pointer back from mmap. */
512 assert (mem != NULL);
514 #if COLORING_INCREMENT != 0
515 /* Atomically increment NCREATED. */
516 unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
518 /* We chose the offset for coloring by incrementing it for
519 every new thread by a fixed amount. The offset used
520 module the page size. Even if coloring would be better
521 relative to higher alignment values it makes no sense to
522 do it since the mmap() interface does not allow us to
523 specify any alignment for the returned memory block. */
524 size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
526 /* Make sure the coloring offsets does not disturb the alignment
527 of the TCB and static TLS block. */
528 if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
529 coloring = (((coloring + __static_tls_align_m1)
530 & ~(__static_tls_align_m1))
531 & ~pagesize_m1);
532 #else
533 /* Unless specified we do not make any adjustments. */
534 # define coloring 0
535 #endif
537 /* Place the thread descriptor at the end of the stack. */
538 #if TLS_TCB_AT_TP
539 pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
540 #elif TLS_DTV_AT_TP
541 pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
542 - __static_tls_size)
543 & ~__static_tls_align_m1)
544 - TLS_PRE_TCB_SIZE);
545 #endif
547 /* Remember the stack-related values. */
548 pd->stackblock = mem;
549 pd->stackblock_size = size;
551 /* We allocated the first block thread-specific data array.
552 This address will not change for the lifetime of this
553 descriptor. */
554 pd->specific[0] = pd->specific_1stblock;
556 /* This is at least the second thread. */
557 pd->header.multiple_threads = 1;
558 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
559 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
560 #endif
562 #ifndef __ASSUME_PRIVATE_FUTEX
563 /* The thread must know when private futexes are supported. */
564 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
565 header.private_futex);
566 #endif
568 #ifdef NEED_DL_SYSINFO
569 /* Copy the sysinfo value from the parent. */
570 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
571 #endif
573 /* Don't allow setxid until cloned. */
574 pd->setxid_futex = -1;
576 /* The process ID is also the same as that of the caller. */
577 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
579 /* Allocate the DTV for this thread. */
580 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
582 /* Something went wrong. */
583 assert (errno == ENOMEM);
585 /* Free the stack memory we just allocated. */
586 (void) munmap (mem, size);
588 return errno;
592 /* Prepare to modify global data. */
593 lll_lock (stack_cache_lock, LLL_PRIVATE);
595 /* And add to the list of stacks in use. */
596 stack_list_add (&pd->list, &stack_used);
598 lll_unlock (stack_cache_lock, LLL_PRIVATE);
601 /* There might have been a race. Another thread might have
602 caused the stacks to get exec permission while this new
603 stack was prepared. Detect if this was possible and
604 change the permission if necessary. */
605 if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
606 && (prot & PROT_EXEC) == 0, 0))
608 int err = change_stack_perm (pd
609 #ifdef NEED_SEPARATE_REGISTER_STACK
610 , ~pagesize_m1
611 #endif
613 if (err != 0)
615 /* Free the stack memory we just allocated. */
616 (void) munmap (mem, size);
618 return err;
623 /* Note that all of the stack and the thread descriptor is
624 zeroed. This means we do not have to initialize fields
625 with initial value zero. This is specifically true for
626 the 'tid' field which is always set back to zero once the
627 stack is not used anymore and for the 'guardsize' field
628 which will be read next. */
631 /* Create or resize the guard area if necessary. */
632 if (__builtin_expect (guardsize > pd->guardsize, 0))
634 #ifdef NEED_SEPARATE_REGISTER_STACK
635 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
636 #elif _STACK_GROWS_DOWN
637 char *guard = mem;
638 # elif _STACK_GROWS_UP
639 char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
640 #endif
641 if (mprotect (guard, guardsize, PROT_NONE) != 0)
643 mprot_error:
644 lll_lock (stack_cache_lock, LLL_PRIVATE);
646 /* Remove the thread from the list. */
647 stack_list_del (&pd->list);
649 lll_unlock (stack_cache_lock, LLL_PRIVATE);
651 /* Get rid of the TLS block we allocated. */
652 _dl_deallocate_tls (TLS_TPADJ (pd), false);
654 /* Free the stack memory regardless of whether the size
655 of the cache is over the limit or not. If this piece
656 of memory caused problems we better do not use it
657 anymore. Uh, and we ignore possible errors. There
658 is nothing we could do. */
659 (void) munmap (mem, size);
661 return errno;
664 pd->guardsize = guardsize;
666 else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
669 /* The old guard area is too large. */
671 #ifdef NEED_SEPARATE_REGISTER_STACK
672 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
673 char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
675 if (oldguard < guard
676 && mprotect (oldguard, guard - oldguard, prot) != 0)
677 goto mprot_error;
679 if (mprotect (guard + guardsize,
680 oldguard + pd->guardsize - guard - guardsize,
681 prot) != 0)
682 goto mprot_error;
683 #elif _STACK_GROWS_DOWN
684 if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
685 prot) != 0)
686 goto mprot_error;
687 #elif _STACK_GROWS_UP
688 if (mprotect ((char *) pd - pd->guardsize,
689 pd->guardsize - guardsize, prot) != 0)
690 goto mprot_error;
691 #endif
693 pd->guardsize = guardsize;
695 /* The pthread_getattr_np() calls need to get passed the size
696 requested in the attribute, regardless of how large the
697 actually used guardsize is. */
698 pd->reported_guardsize = guardsize;
701 /* Initialize the lock. We have to do this unconditionally since the
702 stillborn thread could be canceled while the lock is taken. */
703 pd->lock = LLL_LOCK_INITIALIZER;
705 /* The robust mutex lists also need to be initialized
706 unconditionally because the cleanup for the previous stack owner
707 might have happened in the kernel. */
708 pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
709 - offsetof (pthread_mutex_t,
710 __data.__list.__next));
711 pd->robust_head.list_op_pending = NULL;
712 #ifdef __PTHREAD_MUTEX_HAVE_PREV
713 pd->robust_prev = &pd->robust_head;
714 #endif
715 pd->robust_head.list = &pd->robust_head;
717 /* We place the thread descriptor at the end of the stack. */
718 *pdp = pd;
720 #if TLS_TCB_AT_TP
721 /* The stack begins before the TCB and the static TLS block. */
722 stacktop = ((char *) (pd + 1) - __static_tls_size);
723 #elif TLS_DTV_AT_TP
724 stacktop = (char *) (pd - 1);
725 #endif
727 #ifdef NEED_SEPARATE_REGISTER_STACK
728 *stack = pd->stackblock;
729 *stacksize = stacktop - *stack;
730 #elif _STACK_GROWS_DOWN
731 *stack = stacktop;
732 #elif _STACK_GROWS_UP
733 *stack = pd->stackblock;
734 assert (*stack > 0);
735 #endif
737 return 0;
741 void
742 internal_function
743 __deallocate_stack (struct pthread *pd)
745 lll_lock (stack_cache_lock, LLL_PRIVATE);
747 /* Remove the thread from the list of threads with user defined
748 stacks. */
749 stack_list_del (&pd->list);
751 /* Not much to do. Just free the mmap()ed memory. Note that we do
752 not reset the 'used' flag in the 'tid' field. This is done by
753 the kernel. If no thread has been created yet this field is
754 still zero. */
755 if (__builtin_expect (! pd->user_stack, 1))
756 (void) queue_stack (pd);
757 else
758 /* Free the memory associated with the ELF TLS. */
759 _dl_deallocate_tls (TLS_TPADJ (pd), false);
761 lll_unlock (stack_cache_lock, LLL_PRIVATE);
766 internal_function
767 __make_stacks_executable (void **stack_endp)
769 /* First the main thread's stack. */
770 int err = _dl_make_stack_executable (stack_endp);
771 if (err != 0)
772 return err;
774 #ifdef NEED_SEPARATE_REGISTER_STACK
775 const size_t pagemask = ~(__getpagesize () - 1);
776 #endif
778 lll_lock (stack_cache_lock, LLL_PRIVATE);
780 list_t *runp;
781 list_for_each (runp, &stack_used)
783 err = change_stack_perm (list_entry (runp, struct pthread, list)
784 #ifdef NEED_SEPARATE_REGISTER_STACK
785 , pagemask
786 #endif
788 if (err != 0)
789 break;
792 /* Also change the permission for the currently unused stacks. This
793 might be wasted time but better spend it here than adding a check
794 in the fast path. */
795 if (err == 0)
796 list_for_each (runp, &stack_cache)
798 err = change_stack_perm (list_entry (runp, struct pthread, list)
799 #ifdef NEED_SEPARATE_REGISTER_STACK
800 , pagemask
801 #endif
803 if (err != 0)
804 break;
807 lll_unlock (stack_cache_lock, LLL_PRIVATE);
809 return err;
813 /* In case of a fork() call the memory allocation in the child will be
814 the same but only one thread is running. All stacks except that of
815 the one running thread are not used anymore. We have to recycle
816 them. */
817 void
818 __reclaim_stacks (void)
820 struct pthread *self = (struct pthread *) THREAD_SELF;
822 /* No locking necessary. The caller is the only stack in use. But
823 we have to be aware that we might have interrupted a list
824 operation. */
826 if (in_flight_stack != 0)
828 bool add_p = in_flight_stack & 1;
829 list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
831 if (add_p)
833 /* We always add at the beginning of the list. So in this
834 case we only need to check the beginning of these lists. */
835 int check_list (list_t *l)
837 if (l->next->prev != l)
839 assert (l->next->prev == elem);
841 elem->next = l->next;
842 elem->prev = l;
843 l->next = elem;
845 return 1;
848 return 0;
851 if (check_list (&stack_used) == 0)
852 (void) check_list (&stack_cache);
854 else
856 /* We can simply always replay the delete operation. */
857 elem->next->prev = elem->prev;
858 elem->prev->next = elem->next;
862 /* Mark all stacks except the still running one as free. */
863 list_t *runp;
864 list_for_each (runp, &stack_used)
866 struct pthread *curp = list_entry (runp, struct pthread, list);
867 if (curp != self)
869 /* This marks the stack as free. */
870 curp->tid = 0;
872 /* The PID field must be initialized for the new process. */
873 curp->pid = self->pid;
875 /* Account for the size of the stack. */
876 stack_cache_actsize += curp->stackblock_size;
878 if (curp->specific_used)
880 /* Clear the thread-specific data. */
881 memset (curp->specific_1stblock, '\0',
882 sizeof (curp->specific_1stblock));
884 curp->specific_used = false;
886 for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
887 if (curp->specific[cnt] != NULL)
889 memset (curp->specific[cnt], '\0',
890 sizeof (curp->specific_1stblock));
892 /* We have allocated the block which we do not
893 free here so re-set the bit. */
894 curp->specific_used = true;
900 /* Reset the PIDs in any cached stacks. */
901 list_for_each (runp, &stack_cache)
903 struct pthread *curp = list_entry (runp, struct pthread, list);
904 curp->pid = self->pid;
907 /* Add the stack of all running threads to the cache. */
908 list_splice (&stack_used, &stack_cache);
910 /* Remove the entry for the current thread to from the cache list
911 and add it to the list of running threads. Which of the two
912 lists is decided by the user_stack flag. */
913 stack_list_del (&self->list);
915 /* Re-initialize the lists for all the threads. */
916 INIT_LIST_HEAD (&stack_used);
917 INIT_LIST_HEAD (&__stack_user);
919 if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
920 list_add (&self->list, &__stack_user);
921 else
922 list_add (&self->list, &stack_used);
924 /* There is one thread running. */
925 __nptl_nthreads = 1;
927 in_flight_stack = 0;
929 /* Initialize locks. */
930 stack_cache_lock = LLL_LOCK_INITIALIZER;
931 __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
935 #if HP_TIMING_AVAIL
936 # undef __find_thread_by_id
937 /* Find a thread given the thread ID. */
938 attribute_hidden
939 struct pthread *
940 __find_thread_by_id (pid_t tid)
942 struct pthread *result = NULL;
944 lll_lock (stack_cache_lock, LLL_PRIVATE);
946 /* Iterate over the list with system-allocated threads first. */
947 list_t *runp;
948 list_for_each (runp, &stack_used)
950 struct pthread *curp;
952 curp = list_entry (runp, struct pthread, list);
954 if (curp->tid == tid)
956 result = curp;
957 goto out;
961 /* Now the list with threads using user-allocated stacks. */
962 list_for_each (runp, &__stack_user)
964 struct pthread *curp;
966 curp = list_entry (runp, struct pthread, list);
968 if (curp->tid == tid)
970 result = curp;
971 goto out;
975 out:
976 lll_unlock (stack_cache_lock, LLL_PRIVATE);
978 return result;
980 #endif
983 static void
984 internal_function
985 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
987 int ch;
989 /* Wait until this thread is cloned. */
990 if (t->setxid_futex == -1
991 && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
993 lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
994 while (t->setxid_futex == -2);
996 /* Don't let the thread exit before the setxid handler runs. */
997 t->setxid_futex = 0;
1001 ch = t->cancelhandling;
1003 /* If the thread is exiting right now, ignore it. */
1004 if ((ch & EXITING_BITMASK) != 0)
1006 /* Release the futex if there is no other setxid in
1007 progress. */
1008 if ((ch & SETXID_BITMASK) == 0)
1010 t->setxid_futex = 1;
1011 lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1013 return;
1016 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1017 ch | SETXID_BITMASK, ch));
1021 static void
1022 internal_function
1023 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
1025 int ch;
1029 ch = t->cancelhandling;
1030 if ((ch & SETXID_BITMASK) == 0)
1031 return;
1033 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1034 ch & ~SETXID_BITMASK, ch));
1036 /* Release the futex just in case. */
1037 t->setxid_futex = 1;
1038 lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
1042 static int
1043 internal_function
1044 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
1046 if ((t->cancelhandling & SETXID_BITMASK) == 0)
1047 return 0;
1049 int val;
1050 INTERNAL_SYSCALL_DECL (err);
1051 val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
1052 t->tid, SIGSETXID);
1054 /* If this failed, it must have had not started yet or else exited. */
1055 if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1057 atomic_increment (&cmdp->cntr);
1058 return 1;
1060 else
1061 return 0;
1066 attribute_hidden
1067 __nptl_setxid (struct xid_command *cmdp)
1069 int signalled;
1070 int result;
1071 lll_lock (stack_cache_lock, LLL_PRIVATE);
1073 __xidcmd = cmdp;
1074 cmdp->cntr = 0;
1076 struct pthread *self = THREAD_SELF;
1078 /* Iterate over the list with system-allocated threads first. */
1079 list_t *runp;
1080 list_for_each (runp, &stack_used)
1082 struct pthread *t = list_entry (runp, struct pthread, list);
1083 if (t == self)
1084 continue;
1086 setxid_mark_thread (cmdp, t);
1089 /* Now the list with threads using user-allocated stacks. */
1090 list_for_each (runp, &__stack_user)
1092 struct pthread *t = list_entry (runp, struct pthread, list);
1093 if (t == self)
1094 continue;
1096 setxid_mark_thread (cmdp, t);
1099 /* Iterate until we don't succeed in signalling anyone. That means
1100 we have gotten all running threads, and their children will be
1101 automatically correct once started. */
1104 signalled = 0;
1106 list_for_each (runp, &stack_used)
1108 struct pthread *t = list_entry (runp, struct pthread, list);
1109 if (t == self)
1110 continue;
1112 signalled += setxid_signal_thread (cmdp, t);
1115 list_for_each (runp, &__stack_user)
1117 struct pthread *t = list_entry (runp, struct pthread, list);
1118 if (t == self)
1119 continue;
1121 signalled += setxid_signal_thread (cmdp, t);
1124 int cur = cmdp->cntr;
1125 while (cur != 0)
1127 lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
1128 cur = cmdp->cntr;
1131 while (signalled != 0);
1133 /* Clean up flags, so that no thread blocks during exit waiting
1134 for a signal which will never come. */
1135 list_for_each (runp, &stack_used)
1137 struct pthread *t = list_entry (runp, struct pthread, list);
1138 if (t == self)
1139 continue;
1141 setxid_unmark_thread (cmdp, t);
1144 list_for_each (runp, &__stack_user)
1146 struct pthread *t = list_entry (runp, struct pthread, list);
1147 if (t == self)
1148 continue;
1150 setxid_unmark_thread (cmdp, t);
1153 /* This must be last, otherwise the current thread might not have
1154 permissions to send SIGSETXID syscall to the other threads. */
1155 INTERNAL_SYSCALL_DECL (err);
1156 result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1157 cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1158 if (INTERNAL_SYSCALL_ERROR_P (result, err))
1160 __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
1161 result = -1;
1164 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1165 return result;
1168 static inline void __attribute__((always_inline))
1169 init_one_static_tls (struct pthread *curp, struct link_map *map)
1171 dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
1172 # if TLS_TCB_AT_TP
1173 void *dest = (char *) curp - map->l_tls_offset;
1174 # elif TLS_DTV_AT_TP
1175 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1176 # else
1177 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1178 # endif
1180 /* Fill in the DTV slot so that a later LD/GD access will find it. */
1181 dtv[map->l_tls_modid].pointer.val = dest;
1182 dtv[map->l_tls_modid].pointer.is_static = true;
1184 /* Initialize the memory. */
1185 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1186 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1189 void
1190 attribute_hidden
1191 __pthread_init_static_tls (struct link_map *map)
1193 lll_lock (stack_cache_lock, LLL_PRIVATE);
1195 /* Iterate over the list with system-allocated threads first. */
1196 list_t *runp;
1197 list_for_each (runp, &stack_used)
1198 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1200 /* Now the list with threads using user-allocated stacks. */
1201 list_for_each (runp, &__stack_user)
1202 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1204 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1208 void
1209 attribute_hidden
1210 __wait_lookup_done (void)
1212 lll_lock (stack_cache_lock, LLL_PRIVATE);
1214 struct pthread *self = THREAD_SELF;
1216 /* Iterate over the list with system-allocated threads first. */
1217 list_t *runp;
1218 list_for_each (runp, &stack_used)
1220 struct pthread *t = list_entry (runp, struct pthread, list);
1221 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1222 continue;
1224 int *const gscope_flagp = &t->header.gscope_flag;
1226 /* We have to wait until this thread is done with the global
1227 scope. First tell the thread that we are waiting and
1228 possibly have to be woken. */
1229 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1230 THREAD_GSCOPE_FLAG_WAIT,
1231 THREAD_GSCOPE_FLAG_USED))
1232 continue;
1235 lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1236 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1239 /* Now the list with threads using user-allocated stacks. */
1240 list_for_each (runp, &__stack_user)
1242 struct pthread *t = list_entry (runp, struct pthread, list);
1243 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1244 continue;
1246 int *const gscope_flagp = &t->header.gscope_flag;
1248 /* We have to wait until this thread is done with the global
1249 scope. First tell the thread that we are waiting and
1250 possibly have to be woken. */
1251 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1252 THREAD_GSCOPE_FLAG_WAIT,
1253 THREAD_GSCOPE_FLAG_USED))
1254 continue;
1257 lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1258 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1261 lll_unlock (stack_cache_lock, LLL_PRIVATE);