Test for stack alignment.
[glibc.git] / sysdeps / generic / dl-tls.c
blob3382e3493cf6eca6c75b0865a50daf0d5e3c0ebd
1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 #include <assert.h>
21 #include <signal.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <sys/param.h>
26 #include <tls.h>
28 /* We don't need any of this if TLS is not supported. */
29 #ifdef USE_TLS
31 # include <dl-tls.h>
32 # include <ldsodefs.h>
34 /* Amount of excess space to allocate in the static TLS area
35 to allow dynamic loading of modules defining IE-model TLS data. */
36 # define TLS_STATIC_SURPLUS 64 + DL_NNS * 100
38 /* Value used for dtv entries for which the allocation is delayed. */
39 # define TLS_DTV_UNALLOCATED ((void *) -1l)
42 /* Out-of-memory handler. */
43 # ifdef SHARED
44 static void
45 __attribute__ ((__noreturn__))
46 oom (void)
48 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
50 # endif
54 size_t
55 internal_function
56 _dl_next_tls_modid (void)
58 size_t result;
60 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
62 size_t disp = 0;
63 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
65 /* Note that this branch will never be executed during program
66 start since there are no gaps at that time. Therefore it
67 does not matter that the dl_tls_dtv_slotinfo is not allocated
68 yet when the function is called for the first times. */
69 result = GL(dl_tls_static_nelem) + 1;
70 /* If the following would not be true we mustn't have assumed
71 there is a gap. */
72 assert (result <= GL(dl_tls_max_dtv_idx));
75 while (result - disp < runp->len)
77 if (runp->slotinfo[result - disp].map == NULL)
78 break;
80 ++result;
81 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
84 if (result - disp < runp->len)
85 break;
87 disp += runp->len;
89 while ((runp = runp->next) != NULL);
91 if (result >= GL(dl_tls_max_dtv_idx))
93 /* The new index must indeed be exactly one higher than the
94 previous high. */
95 assert (result == GL(dl_tls_max_dtv_idx));
97 /* There is no gap anymore. */
98 GL(dl_tls_dtv_gaps) = false;
100 goto nogaps;
103 else
105 /* No gaps, allocate a new entry. */
106 nogaps:
107 result = ++GL(dl_tls_max_dtv_idx);
110 return result;
113 # ifdef SHARED
115 void
116 internal_function
117 _dl_determine_tlsoffset (void)
119 struct dtv_slotinfo *slotinfo;
120 size_t max_align = TLS_TCB_ALIGN;
121 size_t offset, freetop = 0, freebottom = 0;
122 size_t cnt;
124 /* The first element of the dtv slot info list is allocated. */
125 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
126 /* There is at this point only one element in the
127 dl_tls_dtv_slotinfo_list list. */
128 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
130 slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
132 /* Determining the offset of the various parts of the static TLS
133 block has several dependencies. In addition we have to work
134 around bugs in some toolchains.
136 Each TLS block from the objects available at link time has a size
137 and an alignment requirement. The GNU ld computes the alignment
138 requirements for the data at the positions *in the file*, though.
139 I.e, it is not simply possible to allocate a block with the size
140 of the TLS program header entry. The data is layed out assuming
141 that the first byte of the TLS block fulfills
143 p_vaddr mod p_align == &TLS_BLOCK mod p_align
145 This means we have to add artificial padding at the beginning of
146 the TLS block. These bytes are never used for the TLS data in
147 this module but the first byte allocated must be aligned
148 according to mod p_align == 0 so that the first byte of the TLS
149 block is aligned according to p_vaddr mod p_align. This is ugly
150 and the linker can help by computing the offsets in the TLS block
151 assuming the first byte of the TLS block is aligned according to
152 p_align.
154 The extra space which might be allocated before the first byte of
155 the TLS block need not go unused. The code below tries to use
156 that memory for the next TLS block. This can work if the total
157 memory requirement for the next TLS block is smaller than the
158 gap. */
160 # if TLS_TCB_AT_TP
161 /* We simply start with zero. */
162 offset = 0;
164 for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
166 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
168 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
169 & (slotinfo[cnt].map->l_tls_align - 1));
170 size_t off;
171 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
173 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
175 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
176 - firstbyte, slotinfo[cnt].map->l_tls_align)
177 + firstbyte;
178 if (off <= freebottom)
180 freetop = off;
182 /* XXX For some architectures we perhaps should store the
183 negative offset. */
184 slotinfo[cnt].map->l_tls_offset = off;
185 continue;
189 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
190 slotinfo[cnt].map->l_tls_align) + firstbyte;
191 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
192 + (freebottom - freetop))
194 freetop = offset;
195 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
197 offset = off;
199 /* XXX For some architectures we perhaps should store the
200 negative offset. */
201 slotinfo[cnt].map->l_tls_offset = off;
204 GL(dl_tls_static_used) = offset;
205 GL(dl_tls_static_size) = (roundup (offset + TLS_STATIC_SURPLUS, max_align)
206 + TLS_TCB_SIZE);
207 # elif TLS_DTV_AT_TP
208 /* The TLS blocks start right after the TCB. */
209 offset = TLS_TCB_SIZE;
211 for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
213 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
215 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
216 & (slotinfo[cnt].map->l_tls_align - 1));
217 size_t off;
218 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
220 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
222 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
223 if (off - freebottom < firstbyte)
224 off += slotinfo[cnt].map->l_tls_align;
225 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
227 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
228 freebottom = off + slotinfo[cnt].map->l_tls_blocksize
229 - firstbyte;
230 continue;
234 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
235 if (off - offset < firstbyte)
236 off += slotinfo[cnt].map->l_tls_align;
238 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
239 if (off - firstbyte - offset > freetop - freebottom)
241 freebottom = offset;
242 freetop = off - firstbyte;
245 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
248 GL(dl_tls_static_used) = offset;
249 GL(dl_tls_static_size) = roundup (offset + TLS_STATIC_SURPLUS,
250 TLS_TCB_ALIGN);
251 # else
252 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
253 # endif
255 /* The alignment requirement for the static TLS block. */
256 GL(dl_tls_static_align) = max_align;
260 /* This is called only when the data structure setup was skipped at startup,
261 when there was no need for it then. Now we have dynamically loaded
262 something needing TLS, or libpthread needs it. */
264 internal_function
265 _dl_tls_setup (void)
267 assert (GL(dl_tls_dtv_slotinfo_list) == NULL);
268 assert (GL(dl_tls_max_dtv_idx) == 0);
270 const size_t nelem = 2 + TLS_SLOTINFO_SURPLUS;
272 GL(dl_tls_dtv_slotinfo_list)
273 = calloc (1, (sizeof (struct dtv_slotinfo_list)
274 + nelem * sizeof (struct dtv_slotinfo)));
275 if (GL(dl_tls_dtv_slotinfo_list) == NULL)
276 return -1;
278 GL(dl_tls_dtv_slotinfo_list)->len = nelem;
280 /* Number of elements in the static TLS block. It can't be zero
281 because of various assumptions. The one element is null. */
282 GL(dl_tls_static_nelem) = GL(dl_tls_max_dtv_idx) = 1;
284 /* This initializes more variables for us. */
285 _dl_determine_tlsoffset ();
287 return 0;
289 rtld_hidden_def (_dl_tls_setup)
290 # endif
292 static void *
293 internal_function
294 allocate_dtv (void *result)
296 dtv_t *dtv;
297 size_t dtv_length;
299 /* We allocate a few more elements in the dtv than are needed for the
300 initial set of modules. This should avoid in most cases expansions
301 of the dtv. */
302 dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
303 dtv = calloc (dtv_length + 2, sizeof (dtv_t));
304 if (dtv != NULL)
306 /* This is the initial length of the dtv. */
307 dtv[0].counter = dtv_length;
309 /* The rest of the dtv (including the generation counter) is
310 Initialize with zero to indicate nothing there. */
312 /* Add the dtv to the thread data structures. */
313 INSTALL_DTV (result, dtv);
315 else
316 result = NULL;
318 return result;
322 /* Get size and alignment requirements of the static TLS block. */
323 void
324 internal_function
325 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
327 *sizep = GL(dl_tls_static_size);
328 *alignp = GL(dl_tls_static_align);
332 void *
333 internal_function
334 _dl_allocate_tls_storage (void)
336 void *result;
337 size_t size = GL(dl_tls_static_size);
339 # if TLS_DTV_AT_TP
340 /* Memory layout is:
341 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
342 ^ This should be returned. */
343 size += (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
344 & ~(GL(dl_tls_static_align) - 1);
345 # endif
347 /* Allocate a correctly aligned chunk of memory. */
348 result = __libc_memalign (GL(dl_tls_static_align), size);
349 if (__builtin_expect (result != NULL, 1))
351 /* Allocate the DTV. */
352 void *allocated = result;
354 # if TLS_TCB_AT_TP
355 /* The TCB follows the TLS blocks. */
356 result = (char *) result + size - TLS_TCB_SIZE;
358 /* Clear the TCB data structure. We can't ask the caller (i.e.
359 libpthread) to do it, because we will initialize the DTV et al. */
360 memset (result, 0, TLS_TCB_SIZE);
361 # elif TLS_DTV_AT_TP
362 result = (char *) result + size - GL(dl_tls_static_size);
364 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
365 We can't ask the caller (i.e. libpthread) to do it, because we will
366 initialize the DTV et al. */
367 memset ((char *) result - TLS_PRE_TCB_SIZE, 0,
368 TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
369 # endif
371 result = allocate_dtv (result);
372 if (result == NULL)
373 free (allocated);
376 return result;
380 void *
381 internal_function
382 _dl_allocate_tls_init (void *result)
384 if (result == NULL)
385 /* The memory allocation failed. */
386 return NULL;
388 dtv_t *dtv = GET_DTV (result);
389 struct dtv_slotinfo_list *listp;
390 size_t total = 0;
392 /* We have to look prepare the dtv for all currently loaded
393 modules using TLS. For those which are dynamically loaded we
394 add the values indicating deferred allocation. */
395 listp = GL(dl_tls_dtv_slotinfo_list);
396 while (1)
398 size_t cnt;
400 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
402 struct link_map *map;
403 void *dest;
405 /* Check for the total number of used slots. */
406 if (total + cnt > GL(dl_tls_max_dtv_idx))
407 break;
409 map = listp->slotinfo[cnt].map;
410 if (map == NULL)
411 /* Unused entry. */
412 continue;
414 if (map->l_tls_offset == NO_TLS_OFFSET)
416 /* For dynamically loaded modules we simply store
417 the value indicating deferred allocation. */
418 dtv[map->l_tls_modid].pointer = TLS_DTV_UNALLOCATED;
419 continue;
422 assert (map->l_tls_modid == cnt);
423 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
424 # if TLS_TCB_AT_TP
425 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
426 dest = (char *) result - map->l_tls_offset;
427 # elif TLS_DTV_AT_TP
428 dest = (char *) result + map->l_tls_offset;
429 # else
430 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
431 # endif
433 /* Copy the initialization image and clear the BSS part. */
434 dtv[map->l_tls_modid].pointer = dest;
435 memset (__mempcpy (dest, map->l_tls_initimage,
436 map->l_tls_initimage_size), '\0',
437 map->l_tls_blocksize - map->l_tls_initimage_size);
440 total += cnt;
441 if (total >= GL(dl_tls_max_dtv_idx))
442 break;
444 listp = listp->next;
445 assert (listp != NULL);
448 return result;
450 rtld_hidden_def (_dl_allocate_tls_init)
452 void *
453 internal_function
454 _dl_allocate_tls (void *mem)
456 return _dl_allocate_tls_init (mem == NULL
457 ? _dl_allocate_tls_storage ()
458 : allocate_dtv (mem));
460 rtld_hidden_def (_dl_allocate_tls)
463 void
464 internal_function
465 _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
467 dtv_t *dtv = GET_DTV (tcb);
469 /* The array starts with dtv[-1]. */
470 #ifdef SHARED
471 if (dtv != GL(dl_initial_dtv))
472 #endif
473 free (dtv - 1);
475 if (dealloc_tcb)
477 # if TLS_TCB_AT_TP
478 /* The TCB follows the TLS blocks. Back up to free the whole block. */
479 tcb -= GL(dl_tls_static_size) - TLS_TCB_SIZE;
480 # elif TLS_DTV_AT_TP
481 /* Back up the TLS_PRE_TCB_SIZE bytes. */
482 tcb -= (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
483 & ~(GL(dl_tls_static_align) - 1);
484 # endif
485 free (tcb);
488 rtld_hidden_def (_dl_deallocate_tls)
491 # ifdef SHARED
492 /* The __tls_get_addr function has two basic forms which differ in the
493 arguments. The IA-64 form takes two parameters, the module ID and
494 offset. The form used, among others, on IA-32 takes a reference to
495 a special structure which contain the same information. The second
496 form seems to be more often used (in the moment) so we default to
497 it. Users of the IA-64 form have to provide adequate definitions
498 of the following macros. */
499 # ifndef GET_ADDR_ARGS
500 # define GET_ADDR_ARGS tls_index *ti
501 # endif
502 # ifndef GET_ADDR_MODULE
503 # define GET_ADDR_MODULE ti->ti_module
504 # endif
505 # ifndef GET_ADDR_OFFSET
506 # define GET_ADDR_OFFSET ti->ti_offset
507 # endif
510 static void *
511 allocate_and_init (struct link_map *map)
513 void *newp;
515 newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
516 if (newp == NULL)
517 oom ();
519 /* Initialize the memory. */
520 memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
521 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
523 return newp;
527 /* The generic dynamic and local dynamic model cannot be used in
528 statically linked applications. */
529 void *
530 __tls_get_addr (GET_ADDR_ARGS)
532 dtv_t *dtv = THREAD_DTV ();
533 struct link_map *the_map = NULL;
534 void *p;
536 if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
538 struct dtv_slotinfo_list *listp;
539 size_t idx;
541 /* The global dl_tls_dtv_slotinfo array contains for each module
542 index the generation counter current when the entry was
543 created. This array never shrinks so that all module indices
544 which were valid at some time can be used to access it.
545 Before the first use of a new module index in this function
546 the array was extended appropriately. Access also does not
547 have to be guarded against modifications of the array. It is
548 assumed that pointer-size values can be read atomically even
549 in SMP environments. It is possible that other threads at
550 the same time dynamically load code and therefore add to the
551 slotinfo list. This is a problem since we must not pick up
552 any information about incomplete work. The solution to this
553 is to ignore all dtv slots which were created after the one
554 we are currently interested. We know that dynamic loading
555 for this module is completed and this is the last load
556 operation we know finished. */
557 idx = GET_ADDR_MODULE;
558 listp = GL(dl_tls_dtv_slotinfo_list);
559 while (idx >= listp->len)
561 idx -= listp->len;
562 listp = listp->next;
565 if (dtv[0].counter < listp->slotinfo[idx].gen)
567 /* The generation counter for the slot is higher than what
568 the current dtv implements. We have to update the whole
569 dtv but only those entries with a generation counter <=
570 the one for the entry we need. */
571 size_t new_gen = listp->slotinfo[idx].gen;
572 size_t total = 0;
574 /* We have to look through the entire dtv slotinfo list. */
575 listp = GL(dl_tls_dtv_slotinfo_list);
578 size_t cnt;
580 for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
582 size_t gen = listp->slotinfo[cnt].gen;
583 struct link_map *map;
584 size_t modid;
586 if (gen > new_gen)
587 /* This is a slot for a generation younger than
588 the one we are handling now. It might be
589 incompletely set up so ignore it. */
590 continue;
592 /* If the entry is older than the current dtv layout
593 we know we don't have to handle it. */
594 if (gen <= dtv[0].counter)
595 continue;
597 /* If there is no map this means the entry is empty. */
598 map = listp->slotinfo[cnt].map;
599 if (map == NULL)
601 /* If this modid was used at some point the memory
602 might still be allocated. */
603 if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
605 free (dtv[total + cnt].pointer);
606 dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
609 continue;
612 /* Check whether the current dtv array is large enough. */
613 modid = map->l_tls_modid;
614 assert (total + cnt == modid);
615 if (dtv[-1].counter < modid)
617 /* Reallocate the dtv. */
618 dtv_t *newp;
619 size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
620 size_t oldsize = dtv[-1].counter;
622 assert (map->l_tls_modid <= newsize);
624 if (dtv == GL(dl_initial_dtv))
626 /* This is the initial dtv that was allocated
627 during rtld startup using the dl-minimal.c
628 malloc instead of the real malloc. We can't
629 free it, we have to abandon the old storage. */
631 newp = malloc ((2 + newsize) * sizeof (dtv_t));
632 if (newp == NULL)
633 oom ();
634 memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
636 else
638 newp = realloc (&dtv[-1],
639 (2 + newsize) * sizeof (dtv_t));
640 if (newp == NULL)
641 oom ();
644 newp[0].counter = newsize;
646 /* Clear the newly allocated part. */
647 memset (newp + 2 + oldsize, '\0',
648 (newsize - oldsize) * sizeof (dtv_t));
650 /* Point dtv to the generation counter. */
651 dtv = &newp[1];
653 /* Install this new dtv in the thread data
654 structures. */
655 INSTALL_NEW_DTV (dtv);
658 /* If there is currently memory allocate for this
659 dtv entry free it. */
660 /* XXX Ideally we will at some point create a memory
661 pool. */
662 if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
663 /* Note that free is called for NULL is well. We
664 deallocate even if it is this dtv entry we are
665 supposed to load. The reason is that we call
666 memalign and not malloc. */
667 free (dtv[modid].pointer);
669 /* This module is loaded dynamically- We defer
670 memory allocation. */
671 dtv[modid].pointer = TLS_DTV_UNALLOCATED;
673 if (modid == GET_ADDR_MODULE)
674 the_map = map;
677 total += listp->len;
679 while ((listp = listp->next) != NULL);
681 /* This will be the new maximum generation counter. */
682 dtv[0].counter = new_gen;
686 p = dtv[GET_ADDR_MODULE].pointer;
688 if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
690 /* The allocation was deferred. Do it now. */
691 if (the_map == NULL)
693 /* Find the link map for this module. */
694 size_t idx = GET_ADDR_MODULE;
695 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
697 while (idx >= listp->len)
699 idx -= listp->len;
700 listp = listp->next;
703 the_map = listp->slotinfo[idx].map;
706 p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
709 return (char *) p + GET_ADDR_OFFSET;
711 # endif
713 #endif /* use TLS */