1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include <sys/param.h>
28 /* We don't need any of this if TLS is not supported. */
32 # include <ldsodefs.h>
34 /* Amount of excess space to allocate in the static TLS area
35 to allow dynamic loading of modules defining IE-model TLS data. */
36 # define TLS_STATIC_SURPLUS 64
38 /* Value used for dtv entries for which the allocation is delayed. */
39 # define TLS_DTV_UNALLOCATED ((void *) -1l)
42 /* Out-of-memory handler. */
45 __attribute__ ((__noreturn__
))
48 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
56 _dl_next_tls_modid (void)
60 if (__builtin_expect (GL(dl_tls_dtv_gaps
), false))
63 struct dtv_slotinfo_list
*runp
= GL(dl_tls_dtv_slotinfo_list
);
65 /* Note that this branch will never be executed during program
66 start since there are no gaps at that time. Therefore it
67 does not matter that the dl_tls_dtv_slotinfo is not allocated
68 yet when the function is called for the first times. */
69 result
= GL(dl_tls_static_nelem
) + 1;
70 /* If the following would not be true we mustn't have assumed
72 assert (result
<= GL(dl_tls_max_dtv_idx
));
75 while (result
- disp
< runp
->len
)
77 if (runp
->slotinfo
[result
- disp
].map
== NULL
)
81 assert (result
<= GL(dl_tls_max_dtv_idx
) + 1);
84 if (result
- disp
< runp
->len
)
89 while ((runp
= runp
->next
) != NULL
);
91 if (result
>= GL(dl_tls_max_dtv_idx
))
93 /* The new index must indeed be exactly one higher than the
95 assert (result
== GL(dl_tls_max_dtv_idx
));
97 /* There is no gap anymore. */
98 GL(dl_tls_dtv_gaps
) = false;
105 /* No gaps, allocate a new entry. */
107 result
= ++GL(dl_tls_max_dtv_idx
);
117 _dl_determine_tlsoffset (void)
119 struct dtv_slotinfo
*slotinfo
;
120 size_t max_align
= TLS_TCB_ALIGN
;
121 size_t offset
, freetop
= 0, freebottom
= 0;
124 /* The first element of the dtv slot info list is allocated. */
125 assert (GL(dl_tls_dtv_slotinfo_list
) != NULL
);
126 /* There is at this point only one element in the
127 dl_tls_dtv_slotinfo_list list. */
128 assert (GL(dl_tls_dtv_slotinfo_list
)->next
== NULL
);
130 slotinfo
= GL(dl_tls_dtv_slotinfo_list
)->slotinfo
;
132 /* Determining the offset of the various parts of the static TLS
133 block has several dependencies. In addition we have to work
134 around bugs in some toolchains.
136 Each TLS block from the objects available at link time has a size
137 and an alignment requirement. The GNU ld computes the alignment
138 requirements for the data at the positions *in the file*, though.
139 I.e, it is not simply possible to allocate a block with the size
140 of the TLS program header entry. The data is layed out assuming
141 that the first byte of the TLS block fulfills
143 p_vaddr mod p_align == &TLS_BLOCK mod p_align
145 This means we have to add artificial padding at the beginning of
146 the TLS block. These bytes are never used for the TLS data in
147 this module but the first byte allocated must be aligned
148 according to mod p_align == 0 so that the first byte of the TLS
149 block is aligned according to p_vaddr mod p_align. This is ugly
150 and the linker can help by computing the offsets in the TLS block
151 assuming the first byte of the TLS block is aligned according to
154 The extra space which might be allocated before the first byte of
155 the TLS block need not go unused. The code below tries to use
156 that memory for the next TLS block. This can work if the total
157 memory requirement for the next TLS block is smaller than the
161 /* We simply start with zero. */
164 for (cnt
= 1; slotinfo
[cnt
].map
!= NULL
; ++cnt
)
166 assert (cnt
< GL(dl_tls_dtv_slotinfo_list
)->len
);
168 size_t firstbyte
= (-slotinfo
[cnt
].map
->l_tls_firstbyte_offset
169 & (slotinfo
[cnt
].map
->l_tls_align
- 1));
171 max_align
= MAX (max_align
, slotinfo
[cnt
].map
->l_tls_align
);
173 if (freebottom
- freetop
>= slotinfo
[cnt
].map
->l_tls_blocksize
)
175 off
= roundup (freetop
+ slotinfo
[cnt
].map
->l_tls_blocksize
176 - firstbyte
, slotinfo
[cnt
].map
->l_tls_align
)
178 if (off
<= freebottom
)
182 /* XXX For some architectures we perhaps should store the
184 slotinfo
[cnt
].map
->l_tls_offset
= off
;
189 off
= roundup (offset
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
,
190 slotinfo
[cnt
].map
->l_tls_align
) + firstbyte
;
191 if (off
> offset
+ slotinfo
[cnt
].map
->l_tls_blocksize
192 + (freebottom
- freetop
))
195 freebottom
= off
- slotinfo
[cnt
].map
->l_tls_blocksize
;
199 /* XXX For some architectures we perhaps should store the
201 slotinfo
[cnt
].map
->l_tls_offset
= off
;
204 GL(dl_tls_static_used
) = offset
;
205 GL(dl_tls_static_size
) = (roundup (offset
+ TLS_STATIC_SURPLUS
, max_align
)
208 /* The TLS blocks start right after the TCB. */
209 offset
= TLS_TCB_SIZE
;
211 for (cnt
= 1; slotinfo
[cnt
].map
!= NULL
; ++cnt
)
213 assert (cnt
< GL(dl_tls_dtv_slotinfo_list
)->len
);
215 size_t firstbyte
= (-slotinfo
[cnt
].map
->l_tls_firstbyte_offset
216 & (slotinfo
[cnt
].map
->l_tls_align
- 1));
218 max_align
= MAX (max_align
, slotinfo
[cnt
].map
->l_tls_align
);
220 if (slotinfo
[cnt
].map
->l_tls_blocksize
>= freetop
- freebottom
)
222 off
= roundup (freebottom
, slotinfo
[cnt
].map
->l_tls_align
);
223 if (off
- freebottom
< firstbyte
)
224 off
+= slotinfo
[cnt
].map
->l_tls_align
;
225 if (off
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
<= freetop
)
227 slotinfo
[cnt
].map
->l_tls_offset
= off
- firstbyte
;
228 freebottom
= off
+ slotinfo
[cnt
].map
->l_tls_blocksize
234 off
= roundup (offset
, slotinfo
[cnt
].map
->l_tls_align
);
235 if (off
- offset
< firstbyte
)
236 off
+= slotinfo
[cnt
].map
->l_tls_align
;
238 slotinfo
[cnt
].map
->l_tls_offset
= off
- firstbyte
;
239 if (off
- firstbyte
- offset
> freetop
- freebottom
)
242 freetop
= off
- firstbyte
;
245 offset
= off
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
;
248 GL(dl_tls_static_used
) = offset
;
249 GL(dl_tls_static_size
) = roundup (offset
+ TLS_STATIC_SURPLUS
,
252 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
255 /* The alignment requirement for the static TLS block. */
256 GL(dl_tls_static_align
) = max_align
;
260 /* This is called only when the data structure setup was skipped at startup,
261 when there was no need for it then. Now we have dynamically loaded
262 something needing TLS, or libpthread needs it. */
267 assert (GL(dl_tls_dtv_slotinfo_list
) == NULL
);
268 assert (GL(dl_tls_max_dtv_idx
) == 0);
270 const size_t nelem
= 2 + TLS_SLOTINFO_SURPLUS
;
272 GL(dl_tls_dtv_slotinfo_list
)
273 = calloc (1, (sizeof (struct dtv_slotinfo_list
)
274 + nelem
* sizeof (struct dtv_slotinfo
)));
275 if (GL(dl_tls_dtv_slotinfo_list
) == NULL
)
278 GL(dl_tls_dtv_slotinfo_list
)->len
= nelem
;
280 /* Number of elements in the static TLS block. It can't be zero
281 because of various assumptions. The one element is null. */
282 GL(dl_tls_static_nelem
) = GL(dl_tls_max_dtv_idx
) = 1;
284 /* This initializes more variables for us. */
285 _dl_determine_tlsoffset ();
289 rtld_hidden_def (_dl_tls_setup
)
294 allocate_dtv (void *result
)
299 /* We allocate a few more elements in the dtv than are needed for the
300 initial set of modules. This should avoid in most cases expansions
302 dtv_length
= GL(dl_tls_max_dtv_idx
) + DTV_SURPLUS
;
303 dtv
= calloc (dtv_length
+ 2, sizeof (dtv_t
));
306 /* This is the initial length of the dtv. */
307 dtv
[0].counter
= dtv_length
;
309 /* The rest of the dtv (including the generation counter) is
310 Initialize with zero to indicate nothing there. */
312 /* Add the dtv to the thread data structures. */
313 INSTALL_DTV (result
, dtv
);
322 /* Get size and alignment requirements of the static TLS block. */
325 _dl_get_tls_static_info (size_t *sizep
, size_t *alignp
)
327 *sizep
= GL(dl_tls_static_size
);
328 *alignp
= GL(dl_tls_static_align
);
334 _dl_allocate_tls_storage (void)
337 size_t size
= GL(dl_tls_static_size
);
341 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
342 ^ This should be returned. */
343 size
+= (TLS_PRE_TCB_SIZE
+ GL(dl_tls_static_align
) - 1)
344 & ~(GL(dl_tls_static_align
) - 1);
347 /* Allocate a correctly aligned chunk of memory. */
348 result
= __libc_memalign (GL(dl_tls_static_align
), size
);
349 if (__builtin_expect (result
!= NULL
, 1))
351 /* Allocate the DTV. */
352 void *allocated
= result
;
355 /* The TCB follows the TLS blocks. */
356 result
= (char *) result
+ size
- TLS_TCB_SIZE
;
358 /* Clear the TCB data structure. We can't ask the caller (i.e.
359 libpthread) to do it, because we will initialize the DTV et al. */
360 memset (result
, 0, TLS_TCB_SIZE
);
362 result
= (char *) result
+ size
- GL(dl_tls_static_size
);
364 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
365 We can't ask the caller (i.e. libpthread) to do it, because we will
366 initialize the DTV et al. */
367 memset ((char *) result
- TLS_PRE_TCB_SIZE
, 0,
368 TLS_PRE_TCB_SIZE
+ TLS_TCB_SIZE
);
371 result
= allocate_dtv (result
);
382 _dl_allocate_tls_init (void *result
)
385 /* The memory allocation failed. */
388 dtv_t
*dtv
= GET_DTV (result
);
389 struct dtv_slotinfo_list
*listp
;
392 /* We have to look prepare the dtv for all currently loaded
393 modules using TLS. For those which are dynamically loaded we
394 add the values indicating deferred allocation. */
395 listp
= GL(dl_tls_dtv_slotinfo_list
);
400 for (cnt
= total
== 0 ? 1 : 0; cnt
< listp
->len
; ++cnt
)
402 struct link_map
*map
;
405 /* Check for the total number of used slots. */
406 if (total
+ cnt
> GL(dl_tls_max_dtv_idx
))
409 map
= listp
->slotinfo
[cnt
].map
;
414 if (map
->l_tls_offset
== NO_TLS_OFFSET
)
416 /* For dynamically loaded modules we simply store
417 the value indicating deferred allocation. */
418 dtv
[map
->l_tls_modid
].pointer
= TLS_DTV_UNALLOCATED
;
422 assert (map
->l_tls_modid
== cnt
);
423 assert (map
->l_tls_blocksize
>= map
->l_tls_initimage_size
);
425 assert ((size_t) map
->l_tls_offset
>= map
->l_tls_blocksize
);
426 dest
= (char *) result
- map
->l_tls_offset
;
428 dest
= (char *) result
+ map
->l_tls_offset
;
430 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
433 /* Copy the initialization image and clear the BSS part. */
434 dtv
[map
->l_tls_modid
].pointer
= dest
;
435 memset (__mempcpy (dest
, map
->l_tls_initimage
,
436 map
->l_tls_initimage_size
), '\0',
437 map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
441 if (total
>= GL(dl_tls_max_dtv_idx
))
445 assert (listp
!= NULL
);
450 rtld_hidden_def (_dl_allocate_tls_init
)
454 _dl_allocate_tls (void *mem
)
456 return _dl_allocate_tls_init (mem
== NULL
457 ? _dl_allocate_tls_storage ()
458 : allocate_dtv (mem
));
460 rtld_hidden_def (_dl_allocate_tls
)
465 _dl_deallocate_tls (void *tcb
, bool dealloc_tcb
)
467 dtv_t
*dtv
= GET_DTV (tcb
);
469 /* The array starts with dtv[-1]. */
471 if (dtv
!= GL(dl_initial_dtv
))
478 /* The TCB follows the TLS blocks. Back up to free the whole block. */
479 tcb
-= GL(dl_tls_static_size
) - TLS_TCB_SIZE
;
481 /* Back up the TLS_PRE_TCB_SIZE bytes. */
482 tcb
-= (TLS_PRE_TCB_SIZE
+ GL(dl_tls_static_align
) - 1)
483 & ~(GL(dl_tls_static_align
) - 1);
488 rtld_hidden_def (_dl_deallocate_tls
)
492 /* The __tls_get_addr function has two basic forms which differ in the
493 arguments. The IA-64 form takes two parameters, the module ID and
494 offset. The form used, among others, on IA-32 takes a reference to
495 a special structure which contain the same information. The second
496 form seems to be more often used (in the moment) so we default to
497 it. Users of the IA-64 form have to provide adequate definitions
498 of the following macros. */
499 # ifndef GET_ADDR_ARGS
500 # define GET_ADDR_ARGS tls_index *ti
502 # ifndef GET_ADDR_MODULE
503 # define GET_ADDR_MODULE ti->ti_module
505 # ifndef GET_ADDR_OFFSET
506 # define GET_ADDR_OFFSET ti->ti_offset
511 allocate_and_init (struct link_map
*map
)
515 newp
= __libc_memalign (map
->l_tls_align
, map
->l_tls_blocksize
);
519 /* Initialize the memory. */
520 memset (__mempcpy (newp
, map
->l_tls_initimage
, map
->l_tls_initimage_size
),
521 '\0', map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
527 /* The generic dynamic and local dynamic model cannot be used in
528 statically linked applications. */
530 __tls_get_addr (GET_ADDR_ARGS
)
532 dtv_t
*dtv
= THREAD_DTV ();
533 struct link_map
*the_map
= NULL
;
536 if (__builtin_expect (dtv
[0].counter
!= GL(dl_tls_generation
), 0))
538 struct dtv_slotinfo_list
*listp
;
541 /* The global dl_tls_dtv_slotinfo array contains for each module
542 index the generation counter current when the entry was
543 created. This array never shrinks so that all module indices
544 which were valid at some time can be used to access it.
545 Before the first use of a new module index in this function
546 the array was extended appropriately. Access also does not
547 have to be guarded against modifications of the array. It is
548 assumed that pointer-size values can be read atomically even
549 in SMP environments. It is possible that other threads at
550 the same time dynamically load code and therefore add to the
551 slotinfo list. This is a problem since we must not pick up
552 any information about incomplete work. The solution to this
553 is to ignore all dtv slots which were created after the one
554 we are currently interested. We know that dynamic loading
555 for this module is completed and this is the last load
556 operation we know finished. */
557 idx
= GET_ADDR_MODULE
;
558 listp
= GL(dl_tls_dtv_slotinfo_list
);
559 while (idx
>= listp
->len
)
565 if (dtv
[0].counter
< listp
->slotinfo
[idx
].gen
)
567 /* The generation counter for the slot is higher than what
568 the current dtv implements. We have to update the whole
569 dtv but only those entries with a generation counter <=
570 the one for the entry we need. */
571 size_t new_gen
= listp
->slotinfo
[idx
].gen
;
574 /* We have to look through the entire dtv slotinfo list. */
575 listp
= GL(dl_tls_dtv_slotinfo_list
);
580 for (cnt
= total
= 0 ? 1 : 0; cnt
< listp
->len
; ++cnt
)
582 size_t gen
= listp
->slotinfo
[cnt
].gen
;
583 struct link_map
*map
;
587 /* This is a slot for a generation younger than
588 the one we are handling now. It might be
589 incompletely set up so ignore it. */
592 /* If the entry is older than the current dtv layout
593 we know we don't have to handle it. */
594 if (gen
<= dtv
[0].counter
)
597 /* If there is no map this means the entry is empty. */
598 map
= listp
->slotinfo
[cnt
].map
;
601 /* If this modid was used at some point the memory
602 might still be allocated. */
603 if (dtv
[total
+ cnt
].pointer
!= TLS_DTV_UNALLOCATED
)
605 free (dtv
[total
+ cnt
].pointer
);
606 dtv
[total
+ cnt
].pointer
= TLS_DTV_UNALLOCATED
;
612 /* Check whether the current dtv array is large enough. */
613 modid
= map
->l_tls_modid
;
614 assert (total
+ cnt
== modid
);
615 if (dtv
[-1].counter
< modid
)
617 /* Reallocate the dtv. */
619 size_t newsize
= GL(dl_tls_max_dtv_idx
) + DTV_SURPLUS
;
620 size_t oldsize
= dtv
[-1].counter
;
622 assert (map
->l_tls_modid
<= newsize
);
624 if (dtv
== GL(dl_initial_dtv
))
626 /* This is the initial dtv that was allocated
627 during rtld startup using the dl-minimal.c
628 malloc instead of the real malloc. We can't
629 free it, we have to abandon the old storage. */
631 newp
= malloc ((2 + newsize
) * sizeof (dtv_t
));
634 memcpy (newp
, &dtv
[-1], oldsize
* sizeof (dtv_t
));
638 newp
= realloc (&dtv
[-1],
639 (2 + newsize
) * sizeof (dtv_t
));
644 newp
[0].counter
= newsize
;
646 /* Clear the newly allocated part. */
647 memset (newp
+ 2 + oldsize
, '\0',
648 (newsize
- oldsize
) * sizeof (dtv_t
));
650 /* Point dtv to the generation counter. */
653 /* Install this new dtv in the thread data
655 INSTALL_NEW_DTV (dtv
);
658 /* If there is currently memory allocate for this
659 dtv entry free it. */
660 /* XXX Ideally we will at some point create a memory
662 if (dtv
[modid
].pointer
!= TLS_DTV_UNALLOCATED
)
663 /* Note that free is called for NULL is well. We
664 deallocate even if it is this dtv entry we are
665 supposed to load. The reason is that we call
666 memalign and not malloc. */
667 free (dtv
[modid
].pointer
);
669 /* This module is loaded dynamically- We defer
670 memory allocation. */
671 dtv
[modid
].pointer
= TLS_DTV_UNALLOCATED
;
673 if (modid
== GET_ADDR_MODULE
)
679 while ((listp
= listp
->next
) != NULL
);
681 /* This will be the new maximum generation counter. */
682 dtv
[0].counter
= new_gen
;
686 p
= dtv
[GET_ADDR_MODULE
].pointer
;
688 if (__builtin_expect (p
== TLS_DTV_UNALLOCATED
, 0))
690 /* The allocation was deferred. Do it now. */
693 /* Find the link map for this module. */
694 size_t idx
= GET_ADDR_MODULE
;
695 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
697 while (idx
>= listp
->len
)
703 the_map
= listp
->slotinfo
[idx
].map
;
706 p
= dtv
[GET_ADDR_MODULE
].pointer
= allocate_and_init (the_map
);
709 return (char *) p
+ GET_ADDR_OFFSET
;