1 /* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 #include <sys/param.h>
27 #include <dl-sysdep.h>
32 #ifndef NEED_SEPARATE_REGISTER_STACK
34 /* Most architectures have exactly one stack pointer. Some have more. */
35 # define STACK_VARIABLES void *stackaddr
37 /* How to pass the values to the 'create_thread' function. */
38 # define STACK_VARIABLES_ARGS stackaddr
40 /* How to declare function which gets there parameters. */
41 # define STACK_VARIABLES_PARMS void *stackaddr
43 /* How to declare allocate_stack. */
44 # define ALLOCATE_STACK_PARMS void **stack
46 /* This is how the function is called. We do it this way to allow
47 other variants of the function to have more parameters. */
48 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
52 /* We need two stacks. The kernel will place them but we have to tell
53 the kernel about the size of the reserved address space. */
54 # define STACK_VARIABLES void *stackaddr; size_t stacksize
56 /* How to pass the values to the 'create_thread' function. */
57 # define STACK_VARIABLES_ARGS stackaddr, stacksize
59 /* How to declare function which gets there parameters. */
60 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
62 /* How to declare allocate_stack. */
63 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
65 /* This is how the function is called. We do it this way to allow
66 other variants of the function to have more parameters. */
67 # define ALLOCATE_STACK(attr, pd) \
68 allocate_stack (attr, pd, &stackaddr, &stacksize)
73 /* Default alignment of stack. */
75 # define STACK_ALIGN __alignof__ (long double)
78 /* Default value for minimal stack size after allocating thread
79 descriptor and guard. */
80 #ifndef MINIMAL_REST_STACK
81 # define MINIMAL_REST_STACK 4096
85 /* Let the architecture add some flags to the mmap() call used to
87 #ifndef ARCH_MAP_FLAGS
88 # define ARCH_MAP_FLAGS 0
91 /* This yields the pointer that TLS support code calls the thread pointer. */
93 # define TLS_TPADJ(pd) (pd)
95 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
98 /* Cache handling for not-yet free stacks. */
100 /* Maximum size in kB of cache. */
101 static size_t stack_cache_maxsize
= 40 * 1024 * 1024; /* 40MiBi by default. */
102 static size_t stack_cache_actsize
;
104 /* Mutex protecting this variable. */
105 static lll_lock_t stack_cache_lock
= LLL_LOCK_INITIALIZER
;
107 /* List of queued stack frames. */
108 static LIST_HEAD (stack_cache
);
110 /* List of the stacks in use. */
111 static LIST_HEAD (stack_used
);
113 /* List of the threads with user provided stacks in use. No need to
114 initialize this, since it's done in __pthread_initialize_minimal. */
115 list_t __stack_user
__attribute__ ((nocommon
));
116 hidden_data_def (__stack_user
)
118 #if COLORING_INCREMENT != 0
119 /* Number of threads created. */
120 static unsigned int nptl_ncreated
;
124 /* Check whether the stack is still used or not. */
125 #define FREE_P(descr) ((descr)->tid <= 0)
128 /* We create a double linked list of all cache entries. Double linked
129 because this allows removing entries from the end. */
132 /* Get a stack frame from the cache. We have to match by size since
133 some blocks might be too small or far too large. */
134 static struct pthread
*
135 get_cached_stack (size_t *sizep
, void **memp
)
137 size_t size
= *sizep
;
138 struct pthread
*result
= NULL
;
141 lll_lock (stack_cache_lock
);
143 /* Search the cache for a matching entry. We search for the
144 smallest stack which has at least the required size. Note that
145 in normal situations the size of all allocated stacks is the
146 same. As the very least there are only a few different sizes.
147 Therefore this loop will exit early most of the time with an
149 list_for_each (entry
, &stack_cache
)
151 struct pthread
*curr
;
153 curr
= list_entry (entry
, struct pthread
, list
);
154 if (FREE_P (curr
) && curr
->stackblock_size
>= size
)
156 if (curr
->stackblock_size
== size
)
163 || result
->stackblock_size
> curr
->stackblock_size
)
168 if (__builtin_expect (result
== NULL
, 0)
169 /* Make sure the size difference is not too excessive. In that
170 case we do not use the block. */
171 || __builtin_expect (result
->stackblock_size
> 4 * size
, 0))
173 /* Release the lock. */
174 lll_unlock (stack_cache_lock
);
179 /* Dequeue the entry. */
180 list_del (&result
->list
);
182 /* And add to the list of stacks in use. */
183 list_add (&result
->list
, &stack_used
);
185 /* And decrease the cache size. */
186 stack_cache_actsize
-= result
->stackblock_size
;
188 /* Release the lock early. */
189 lll_unlock (stack_cache_lock
);
191 /* Report size and location of the stack to the caller. */
192 *sizep
= result
->stackblock_size
;
193 *memp
= result
->stackblock
;
195 /* Cancellation handling is back to the default. */
196 result
->cancelhandling
= 0;
197 result
->cleanup
= NULL
;
199 /* No pending event. */
200 result
->nextevent
= NULL
;
203 dtv_t
*dtv
= GET_DTV (TLS_TPADJ (result
));
204 memset (dtv
, '\0', (dtv
[-1].counter
+ 1) * sizeof (dtv_t
));
206 /* Re-initialize the TLS. */
207 _dl_allocate_tls_init (TLS_TPADJ (result
));
213 /* Add a stack frame which is not used anymore to the stack. Must be
214 called with the cache lock held. */
216 __attribute ((always_inline
))
217 queue_stack (struct pthread
*stack
)
219 /* We unconditionally add the stack to the list. The memory may
220 still be in use but it will not be reused until the kernel marks
221 the stack as not used anymore. */
222 list_add (&stack
->list
, &stack_cache
);
224 stack_cache_actsize
+= stack
->stackblock_size
;
225 if (__builtin_expect (stack_cache_actsize
> stack_cache_maxsize
, 0))
227 /* We reduce the size of the cache. Remove the last entries
228 until the size is below the limit. */
232 /* Search from the end of the list. */
233 list_for_each_prev_safe (entry
, prev
, &stack_cache
)
235 struct pthread
*curr
;
237 curr
= list_entry (entry
, struct pthread
, list
);
240 /* Unlink the block. */
243 /* Account for the freed memory. */
244 stack_cache_actsize
-= curr
->stackblock_size
;
246 /* Free the memory associated with the ELF TLS. */
247 _dl_deallocate_tls (TLS_TPADJ (curr
), false);
249 /* Remove this block. This should never fail. If it
250 does something is really wrong. */
251 if (munmap (curr
->stackblock
, curr
->stackblock_size
) != 0)
254 /* Maybe we have freed enough. */
255 if (stack_cache_actsize
<= stack_cache_maxsize
)
265 change_stack_perm (struct pthread
*pd
266 #ifdef NEED_SEPARATE_REGISTER_STACK
271 #ifdef NEED_SEPARATE_REGISTER_STACK
272 void *stack
= (pd
->stackblock
273 + (((((pd
->stackblock_size
- pd
->guardsize
) / 2)
274 & pagemask
) + pd
->guardsize
) & pagemask
));
275 size_t len
= pd
->stackblock
+ pd
->stackblock_size
- stack
;
277 void *stack
= pd
->stackblock
+ pd
->guardsize
;
278 size_t len
= pd
->stackblock_size
- pd
->guardsize
;
280 if (mprotect (stack
, len
, PROT_READ
| PROT_WRITE
| PROT_EXEC
) != 0)
288 allocate_stack (const struct pthread_attr
*attr
, struct pthread
**pdp
,
289 ALLOCATE_STACK_PARMS
)
293 size_t pagesize_m1
= __getpagesize () - 1;
296 assert (attr
!= NULL
);
297 assert (powerof2 (pagesize_m1
+ 1));
298 assert (TCB_ALIGNMENT
>= STACK_ALIGN
);
300 /* Get the stack size from the attribute if it is set. Otherwise we
301 use the default we determined at start time. */
302 size
= attr
->stacksize
?: __default_stacksize
;
304 /* Get memory for the stack. */
305 if (__builtin_expect (attr
->flags
& ATTR_FLAG_STACKADDR
, 0))
309 /* If the user also specified the size of the stack make sure it
311 if (attr
->stacksize
!= 0
312 && attr
->stacksize
< (__static_tls_size
+ MINIMAL_REST_STACK
))
315 /* Adjust stack size for alignment of the TLS block. */
317 adj
= ((uintptr_t) attr
->stackaddr
- TLS_TCB_SIZE
)
318 & __static_tls_align_m1
;
319 assert (size
> adj
+ TLS_TCB_SIZE
);
321 adj
= ((uintptr_t) attr
->stackaddr
- __static_tls_size
)
322 & __static_tls_align_m1
;
326 /* The user provided some memory. Let's hope it matches the
327 size... We do not allocate guard pages if the user provided
328 the stack. It is the user's responsibility to do this if it
331 pd
= (struct pthread
*) ((uintptr_t) attr
->stackaddr
332 - TLS_TCB_SIZE
- adj
);
334 pd
= (struct pthread
*) (((uintptr_t) attr
->stackaddr
335 - __static_tls_size
- adj
)
339 /* The user provided stack memory needs to be cleared. */
340 memset (pd
, '\0', sizeof (struct pthread
));
342 /* The first TSD block is included in the TCB. */
343 pd
->specific
[0] = pd
->specific_1stblock
;
345 /* Remember the stack-related values. */
346 pd
->stackblock
= (char *) attr
->stackaddr
- size
;
347 pd
->stackblock_size
= size
;
349 /* This is a user-provided stack. It will not be queued in the
350 stack cache nor will the memory (except the TLS memory) be freed. */
351 pd
->user_stack
= true;
353 /* This is at least the second thread. */
354 pd
->header
.multiple_threads
= 1;
355 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
356 __pthread_multiple_threads
= *__libc_multiple_threads_ptr
= 1;
359 #ifdef NEED_DL_SYSINFO
360 /* Copy the sysinfo value from the parent. */
361 THREAD_SYSINFO(pd
) = THREAD_SELF_SYSINFO
;
364 /* The process ID is also the same as that of the caller. */
365 pd
->pid
= THREAD_GETMEM (THREAD_SELF
, pid
);
367 /* Allocate the DTV for this thread. */
368 if (_dl_allocate_tls (TLS_TPADJ (pd
)) == NULL
)
369 /* Something went wrong. */
373 /* Prepare to modify global data. */
374 lll_lock (stack_cache_lock
);
376 /* And add to the list of stacks in use. */
377 list_add (&pd
->list
, &__stack_user
);
379 lll_unlock (stack_cache_lock
);
383 /* Allocate some anonymous memory. If possible use the cache. */
387 const int prot
= (PROT_READ
| PROT_WRITE
388 | ((GL(dl_stack_flags
) & PF_X
) ? PROT_EXEC
: 0));
390 #if COLORING_INCREMENT != 0
391 /* Add one more page for stack coloring. Don't do it for stacks
392 with 16 times pagesize or larger. This might just cause
393 unnecessary misalignment. */
394 if (size
<= 16 * pagesize_m1
)
395 size
+= pagesize_m1
+ 1;
398 /* Adjust the stack size for alignment. */
399 size
&= ~__static_tls_align_m1
;
402 /* Make sure the size of the stack is enough for the guard and
403 eventually the thread descriptor. */
404 guardsize
= (attr
->guardsize
+ pagesize_m1
) & ~pagesize_m1
;
405 if (__builtin_expect (size
< (guardsize
+ __static_tls_size
406 + MINIMAL_REST_STACK
+ pagesize_m1
+ 1),
408 /* The stack is too small (or the guard too large). */
411 /* Try to get a stack from the cache. */
413 pd
= get_cached_stack (&size
, &mem
);
416 /* To avoid aliasing effects on a larger scale then pages we
417 adjust the allocated stack size if necessary. This way
418 allocations directly following each other will not have
419 aliasing problems. */
420 #if MULTI_PAGE_ALIASING != 0
421 if ((size
% MULTI_PAGE_ALIASING
) == 0)
422 size
+= pagesize_m1
+ 1;
425 mem
= mmap (NULL
, size
, prot
,
426 MAP_PRIVATE
| MAP_ANONYMOUS
| ARCH_MAP_FLAGS
, -1, 0);
428 if (__builtin_expect (mem
== MAP_FAILED
, 0))
430 #ifdef ARCH_RETRY_MMAP
431 mem
= ARCH_RETRY_MMAP (size
);
432 if (__builtin_expect (mem
== MAP_FAILED
, 0))
437 /* SIZE is guaranteed to be greater than zero.
438 So we can never get a null pointer back from mmap. */
439 assert (mem
!= NULL
);
441 #if COLORING_INCREMENT != 0
442 /* Atomically increment NCREATED. */
443 unsigned int ncreated
= atomic_increment_val (&nptl_ncreated
);
445 /* We chose the offset for coloring by incrementing it for
446 every new thread by a fixed amount. The offset used
447 module the page size. Even if coloring would be better
448 relative to higher alignment values it makes no sense to
449 do it since the mmap() interface does not allow us to
450 specify any alignment for the returned memory block. */
451 size_t coloring
= (ncreated
* COLORING_INCREMENT
) & pagesize_m1
;
453 /* Make sure the coloring offsets does not disturb the alignment
454 of the TCB and static TLS block. */
455 if (__builtin_expect ((coloring
& __static_tls_align_m1
) != 0, 0))
456 coloring
= (((coloring
+ __static_tls_align_m1
)
457 & ~(__static_tls_align_m1
))
460 /* Unless specified we do not make any adjustments. */
464 /* Place the thread descriptor at the end of the stack. */
466 pd
= (struct pthread
*) ((char *) mem
+ size
- coloring
) - 1;
468 pd
= (struct pthread
*) ((((uintptr_t) mem
+ size
- coloring
470 & ~__static_tls_align_m1
)
474 /* Remember the stack-related values. */
475 pd
->stackblock
= mem
;
476 pd
->stackblock_size
= size
;
478 /* We allocated the first block thread-specific data array.
479 This address will not change for the lifetime of this
481 pd
->specific
[0] = pd
->specific_1stblock
;
483 /* This is at least the second thread. */
484 pd
->header
.multiple_threads
= 1;
485 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
486 __pthread_multiple_threads
= *__libc_multiple_threads_ptr
= 1;
489 #ifdef NEED_DL_SYSINFO
490 /* Copy the sysinfo value from the parent. */
491 THREAD_SYSINFO(pd
) = THREAD_SELF_SYSINFO
;
494 /* The process ID is also the same as that of the caller. */
495 pd
->pid
= THREAD_GETMEM (THREAD_SELF
, pid
);
497 /* Allocate the DTV for this thread. */
498 if (_dl_allocate_tls (TLS_TPADJ (pd
)) == NULL
)
500 /* Something went wrong. */
503 /* Free the stack memory we just allocated. */
504 (void) munmap (mem
, size
);
510 /* Prepare to modify global data. */
511 lll_lock (stack_cache_lock
);
513 /* And add to the list of stacks in use. */
514 list_add (&pd
->list
, &stack_used
);
516 lll_unlock (stack_cache_lock
);
519 /* There might have been a race. Another thread might have
520 caused the stacks to get exec permission while this new
521 stack was prepared. Detect if this was possible and
522 change the permission if necessary. */
523 if (__builtin_expect ((GL(dl_stack_flags
) & PF_X
) != 0
524 && (prot
& PROT_EXEC
) == 0, 0))
526 int err
= change_stack_perm (pd
527 #ifdef NEED_SEPARATE_REGISTER_STACK
533 /* Free the stack memory we just allocated. */
534 (void) munmap (mem
, size
);
541 /* Note that all of the stack and the thread descriptor is
542 zeroed. This means we do not have to initialize fields
543 with initial value zero. This is specifically true for
544 the 'tid' field which is always set back to zero once the
545 stack is not used anymore and for the 'guardsize' field
546 which will be read next. */
549 /* Create or resize the guard area if necessary. */
550 if (__builtin_expect (guardsize
> pd
->guardsize
, 0))
552 #ifdef NEED_SEPARATE_REGISTER_STACK
553 char *guard
= mem
+ (((size
- guardsize
) / 2) & ~pagesize_m1
);
557 if (mprotect (guard
, guardsize
, PROT_NONE
) != 0)
563 lll_lock (stack_cache_lock
);
565 /* Remove the thread from the list. */
566 list_del (&pd
->list
);
568 lll_unlock (stack_cache_lock
);
570 /* Get rid of the TLS block we allocated. */
571 _dl_deallocate_tls (TLS_TPADJ (pd
), false);
573 /* Free the stack memory regardless of whether the size
574 of the cache is over the limit or not. If this piece
575 of memory caused problems we better do not use it
576 anymore. Uh, and we ignore possible errors. There
577 is nothing we could do. */
578 (void) munmap (mem
, size
);
583 pd
->guardsize
= guardsize
;
585 else if (__builtin_expect (pd
->guardsize
- guardsize
> size
- reqsize
,
588 /* The old guard area is too large. */
590 #ifdef NEED_SEPARATE_REGISTER_STACK
591 char *guard
= mem
+ (((size
- guardsize
) / 2) & ~pagesize_m1
);
592 char *oldguard
= mem
+ (((size
- pd
->guardsize
) / 2) & ~pagesize_m1
);
595 && mprotect (oldguard
, guard
- oldguard
, prot
) != 0)
598 if (mprotect (guard
+ guardsize
,
599 oldguard
+ pd
->guardsize
- guard
- guardsize
,
603 if (mprotect ((char *) mem
+ guardsize
, pd
->guardsize
- guardsize
,
608 pd
->guardsize
= guardsize
;
610 /* The pthread_getattr_np() calls need to get passed the size
611 requested in the attribute, regardless of how large the
612 actually used guardsize is. */
613 pd
->reported_guardsize
= guardsize
;
616 /* Initialize the lock. We have to do this unconditionally since the
617 stillborn thread could be canceled while the lock is taken. */
618 pd
->lock
= LLL_LOCK_INITIALIZER
;
620 /* We place the thread descriptor at the end of the stack. */
624 /* The stack begins before the TCB and the static TLS block. */
625 stacktop
= ((char *) (pd
+ 1) - __static_tls_size
);
627 stacktop
= (char *) (pd
- 1);
630 #ifdef NEED_SEPARATE_REGISTER_STACK
631 *stack
= pd
->stackblock
;
632 *stacksize
= stacktop
- *stack
;
643 __deallocate_stack (struct pthread
*pd
)
645 lll_lock (stack_cache_lock
);
647 /* Remove the thread from the list of threads with user defined
649 list_del (&pd
->list
);
651 /* Not much to do. Just free the mmap()ed memory. Note that we do
652 not reset the 'used' flag in the 'tid' field. This is done by
653 the kernel. If no thread has been created yet this field is
655 if (__builtin_expect (! pd
->user_stack
, 1))
656 (void) queue_stack (pd
);
658 /* Free the memory associated with the ELF TLS. */
659 _dl_deallocate_tls (TLS_TPADJ (pd
), false);
661 lll_unlock (stack_cache_lock
);
667 __make_stacks_executable (void **stack_endp
)
669 /* First the main thread's stack. */
670 int err
= _dl_make_stack_executable (stack_endp
);
674 #ifdef NEED_SEPARATE_REGISTER_STACK
675 const size_t pagemask
= ~(__getpagesize () - 1);
678 lll_lock (stack_cache_lock
);
681 list_for_each (runp
, &stack_used
)
683 err
= change_stack_perm (list_entry (runp
, struct pthread
, list
)
684 #ifdef NEED_SEPARATE_REGISTER_STACK
692 /* Also change the permission for the currently unused stacks. This
693 might be wasted time but better spend it here than adding a check
696 list_for_each (runp
, &stack_cache
)
698 err
= change_stack_perm (list_entry (runp
, struct pthread
, list
)
699 #ifdef NEED_SEPARATE_REGISTER_STACK
707 lll_unlock (stack_cache_lock
);
713 /* In case of a fork() call the memory allocation in the child will be
714 the same but only one thread is running. All stacks except that of
715 the one running thread are not used anymore. We have to recycle
718 __reclaim_stacks (void)
720 struct pthread
*self
= (struct pthread
*) THREAD_SELF
;
722 /* No locking necessary. The caller is the only stack in use. */
724 /* Mark all stacks except the still running one as free. */
726 list_for_each (runp
, &stack_used
)
728 struct pthread
*curp
;
730 curp
= list_entry (runp
, struct pthread
, list
);
733 /* This marks the stack as free. */
736 /* The PID field must be initialized for the new process. */
737 curp
->pid
= self
->pid
;
739 /* Account for the size of the stack. */
740 stack_cache_actsize
+= curp
->stackblock_size
;
744 /* Add the stack of all running threads to the cache. */
745 list_splice (&stack_used
, &stack_cache
);
747 /* Remove the entry for the current thread to from the cache list
748 and add it to the list of running threads. Which of the two
749 lists is decided by the user_stack flag. */
750 list_del (&self
->list
);
752 /* Re-initialize the lists for all the threads. */
753 INIT_LIST_HEAD (&stack_used
);
754 INIT_LIST_HEAD (&__stack_user
);
756 if (__builtin_expect (THREAD_GETMEM (self
, user_stack
), 0))
757 list_add (&self
->list
, &__stack_user
);
759 list_add (&self
->list
, &stack_used
);
761 /* There is one thread running. */
764 /* Initialize the lock. */
765 stack_cache_lock
= LLL_LOCK_INITIALIZER
;
770 /* Find a thread given the thread ID. */
773 __find_thread_by_id (pid_t tid
)
775 struct pthread
*result
= NULL
;
777 lll_lock (stack_cache_lock
);
779 /* Iterate over the list with system-allocated threads first. */
781 list_for_each (runp
, &stack_used
)
783 struct pthread
*curp
;
785 curp
= list_entry (runp
, struct pthread
, list
);
787 if (curp
->tid
== tid
)
794 /* Now the list with threads using user-allocated stacks. */
795 list_for_each (runp
, &__stack_user
)
797 struct pthread
*curp
;
799 curp
= list_entry (runp
, struct pthread
, list
);
801 if (curp
->tid
== tid
)
809 lll_unlock (stack_cache_lock
);
815 static inline void __attribute__((always_inline
))
816 init_one_static_tls (struct pthread
*curp
, struct link_map
*map
)
818 dtv_t
*dtv
= GET_DTV (TLS_TPADJ (curp
));
820 void *dest
= (char *) curp
- map
->l_tls_offset
;
822 void *dest
= (char *) curp
+ map
->l_tls_offset
+ TLS_PRE_TCB_SIZE
;
824 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
827 /* Fill in the DTV slot so that a later LD/GD access will find it. */
828 dtv
[map
->l_tls_modid
].pointer
= dest
;
830 /* Initialize the memory. */
831 memset (__mempcpy (dest
, map
->l_tls_initimage
, map
->l_tls_initimage_size
),
832 '\0', map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
837 __pthread_init_static_tls (struct link_map
*map
)
839 lll_lock (stack_cache_lock
);
841 /* Iterate over the list with system-allocated threads first. */
843 list_for_each (runp
, &stack_used
)
844 init_one_static_tls (list_entry (runp
, struct pthread
, list
), map
);
846 /* Now the list with threads using user-allocated stacks. */
847 list_for_each (runp
, &__stack_user
)
848 init_one_static_tls (list_entry (runp
, struct pthread
, list
), map
);
850 lll_unlock (stack_cache_lock
);