1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
25 #include <sys/param.h>
36 #define TUNABLE_NAMESPACE rtld
37 #include <dl-tunables.h>
39 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
45 - and optionally for optimizing dynamic TLS access.
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
61 /* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65 #define LIBC_IE_TLS 144
67 /* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70 #define OTHER_IE_TLS 144
72 /* Default number of namespaces. */
75 /* Default for dl_tls_static_optional. */
76 #define OPTIONAL_TLS 512
78 /* Compute the static TLS surplus based on the namespace count and the
79 TLS space that can be used for optimizations. */
81 tls_static_surplus (int nns
, int opt_tls
)
83 return (nns
- 1) * LIBC_IE_TLS
+ nns
* OTHER_IE_TLS
+ opt_tls
;
86 /* This value is chosen so that with default values for the tunables,
87 the computation of dl_tls_static_surplus in
88 _dl_tls_static_surplus_init yields the historic value 1664, for
89 backwards compatibility. */
90 #define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
92 /* Calculate the size of the static TLS surplus, when the given
93 number of audit modules are loaded. Must be called after the
94 number of audit modules is known and before static TLS allocation. */
96 _dl_tls_static_surplus_init (size_t naudit
)
101 nns
= TUNABLE_GET (nns
, size_t, NULL
);
102 opt_tls
= TUNABLE_GET (optional_static_tls
, size_t, NULL
);
104 /* Default values of the tunables. */
106 opt_tls
= OPTIONAL_TLS
;
110 if (DL_NNS
- nns
< naudit
)
111 _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
112 (unsigned long) naudit
, (unsigned long) (DL_NNS
- nns
));
115 GL(dl_tls_static_optional
) = opt_tls
;
116 assert (LEGACY_TLS
>= 0);
117 GLRO(dl_tls_static_surplus
) = tls_static_surplus (nns
, opt_tls
) + LEGACY_TLS
;
120 /* Out-of-memory handler. */
122 __attribute__ ((__noreturn__
))
125 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
130 _dl_assign_tls_modid (struct link_map
*l
)
134 if (__builtin_expect (GL(dl_tls_dtv_gaps
), false))
137 struct dtv_slotinfo_list
*runp
= GL(dl_tls_dtv_slotinfo_list
);
139 /* Note that this branch will never be executed during program
140 start since there are no gaps at that time. Therefore it
141 does not matter that the dl_tls_dtv_slotinfo is not allocated
142 yet when the function is called for the first times.
144 NB: the offset +1 is due to the fact that DTV[0] is used
145 for something else. */
146 result
= GL(dl_tls_static_nelem
) + 1;
147 if (result
<= GL(dl_tls_max_dtv_idx
))
150 while (result
- disp
< runp
->len
)
152 if (runp
->slotinfo
[result
- disp
].map
== NULL
)
156 assert (result
<= GL(dl_tls_max_dtv_idx
) + 1);
159 if (result
- disp
< runp
->len
)
161 /* Mark the entry as used, so any dependency see it. */
162 atomic_store_relaxed (&runp
->slotinfo
[result
- disp
].map
, l
);
168 while ((runp
= runp
->next
) != NULL
);
170 if (result
> GL(dl_tls_max_dtv_idx
))
172 /* The new index must indeed be exactly one higher than the
174 assert (result
== GL(dl_tls_max_dtv_idx
) + 1);
175 /* There is no gap anymore. */
176 GL(dl_tls_dtv_gaps
) = false;
183 /* No gaps, allocate a new entry. */
186 result
= GL(dl_tls_max_dtv_idx
) + 1;
187 /* Can be read concurrently. */
188 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx
), result
);
191 l
->l_tls_modid
= result
;
196 _dl_count_modids (void)
198 /* The count is the max unless dlclose or failed dlopen created gaps. */
199 if (__glibc_likely (!GL(dl_tls_dtv_gaps
)))
200 return GL(dl_tls_max_dtv_idx
);
202 /* We have gaps and are forced to count the non-NULL entries. */
204 struct dtv_slotinfo_list
*runp
= GL(dl_tls_dtv_slotinfo_list
);
207 for (size_t i
= 0; i
< runp
->len
; ++i
)
208 if (runp
->slotinfo
[i
].map
!= NULL
)
220 _dl_determine_tlsoffset (void)
222 size_t max_align
= TCB_ALIGNMENT
;
224 size_t freebottom
= 0;
226 /* The first element of the dtv slot info list is allocated. */
227 assert (GL(dl_tls_dtv_slotinfo_list
) != NULL
);
228 /* There is at this point only one element in the
229 dl_tls_dtv_slotinfo_list list. */
230 assert (GL(dl_tls_dtv_slotinfo_list
)->next
== NULL
);
232 struct dtv_slotinfo
*slotinfo
= GL(dl_tls_dtv_slotinfo_list
)->slotinfo
;
234 /* Determining the offset of the various parts of the static TLS
235 block has several dependencies. In addition we have to work
236 around bugs in some toolchains.
238 Each TLS block from the objects available at link time has a size
239 and an alignment requirement. The GNU ld computes the alignment
240 requirements for the data at the positions *in the file*, though.
241 I.e, it is not simply possible to allocate a block with the size
242 of the TLS program header entry. The data is layed out assuming
243 that the first byte of the TLS block fulfills
245 p_vaddr mod p_align == &TLS_BLOCK mod p_align
247 This means we have to add artificial padding at the beginning of
248 the TLS block. These bytes are never used for the TLS data in
249 this module but the first byte allocated must be aligned
250 according to mod p_align == 0 so that the first byte of the TLS
251 block is aligned according to p_vaddr mod p_align. This is ugly
252 and the linker can help by computing the offsets in the TLS block
253 assuming the first byte of the TLS block is aligned according to
256 The extra space which might be allocated before the first byte of
257 the TLS block need not go unused. The code below tries to use
258 that memory for the next TLS block. This can work if the total
259 memory requirement for the next TLS block is smaller than the
263 /* We simply start with zero. */
266 for (size_t cnt
= 0; slotinfo
[cnt
].map
!= NULL
; ++cnt
)
268 assert (cnt
< GL(dl_tls_dtv_slotinfo_list
)->len
);
270 size_t firstbyte
= (-slotinfo
[cnt
].map
->l_tls_firstbyte_offset
271 & (slotinfo
[cnt
].map
->l_tls_align
- 1));
273 max_align
= MAX (max_align
, slotinfo
[cnt
].map
->l_tls_align
);
275 if (freebottom
- freetop
>= slotinfo
[cnt
].map
->l_tls_blocksize
)
277 off
= roundup (freetop
+ slotinfo
[cnt
].map
->l_tls_blocksize
278 - firstbyte
, slotinfo
[cnt
].map
->l_tls_align
)
280 if (off
<= freebottom
)
284 /* XXX For some architectures we perhaps should store the
286 slotinfo
[cnt
].map
->l_tls_offset
= off
;
291 off
= roundup (offset
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
,
292 slotinfo
[cnt
].map
->l_tls_align
) + firstbyte
;
293 if (off
> offset
+ slotinfo
[cnt
].map
->l_tls_blocksize
294 + (freebottom
- freetop
))
297 freebottom
= off
- slotinfo
[cnt
].map
->l_tls_blocksize
;
301 /* XXX For some architectures we perhaps should store the
303 slotinfo
[cnt
].map
->l_tls_offset
= off
;
306 GL(dl_tls_static_used
) = offset
;
307 GLRO (dl_tls_static_size
) = (roundup (offset
+ GLRO(dl_tls_static_surplus
),
311 /* The TLS blocks start right after the TCB. */
312 size_t offset
= TLS_TCB_SIZE
;
314 for (size_t cnt
= 0; slotinfo
[cnt
].map
!= NULL
; ++cnt
)
316 assert (cnt
< GL(dl_tls_dtv_slotinfo_list
)->len
);
318 size_t firstbyte
= (-slotinfo
[cnt
].map
->l_tls_firstbyte_offset
319 & (slotinfo
[cnt
].map
->l_tls_align
- 1));
321 max_align
= MAX (max_align
, slotinfo
[cnt
].map
->l_tls_align
);
323 if (slotinfo
[cnt
].map
->l_tls_blocksize
<= freetop
- freebottom
)
325 off
= roundup (freebottom
, slotinfo
[cnt
].map
->l_tls_align
);
326 if (off
- freebottom
< firstbyte
)
327 off
+= slotinfo
[cnt
].map
->l_tls_align
;
328 if (off
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
<= freetop
)
330 slotinfo
[cnt
].map
->l_tls_offset
= off
- firstbyte
;
331 freebottom
= (off
+ slotinfo
[cnt
].map
->l_tls_blocksize
337 off
= roundup (offset
, slotinfo
[cnt
].map
->l_tls_align
);
338 if (off
- offset
< firstbyte
)
339 off
+= slotinfo
[cnt
].map
->l_tls_align
;
341 slotinfo
[cnt
].map
->l_tls_offset
= off
- firstbyte
;
342 if (off
- firstbyte
- offset
> freetop
- freebottom
)
345 freetop
= off
- firstbyte
;
348 offset
= off
+ slotinfo
[cnt
].map
->l_tls_blocksize
- firstbyte
;
351 GL(dl_tls_static_used
) = offset
;
352 GLRO (dl_tls_static_size
) = roundup (offset
+ GLRO(dl_tls_static_surplus
),
355 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
358 /* The alignment requirement for the static TLS block. */
359 GLRO (dl_tls_static_align
) = max_align
;
364 allocate_dtv (void *result
)
369 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
370 size_t max_modid
= atomic_load_relaxed (&GL(dl_tls_max_dtv_idx
));
371 /* We allocate a few more elements in the dtv than are needed for the
372 initial set of modules. This should avoid in most cases expansions
374 dtv_length
= max_modid
+ DTV_SURPLUS
;
375 dtv
= calloc (dtv_length
+ 2, sizeof (dtv_t
));
378 /* This is the initial length of the dtv. */
379 dtv
[0].counter
= dtv_length
;
381 /* The rest of the dtv (including the generation counter) is
382 Initialize with zero to indicate nothing there. */
384 /* Add the dtv to the thread data structures. */
385 INSTALL_DTV (result
, dtv
);
393 /* Get size and alignment requirements of the static TLS block. This
394 function is no longer used by glibc itself, but the GCC sanitizers
395 use it despite the GLIBC_PRIVATE status. */
397 _dl_get_tls_static_info (size_t *sizep
, size_t *alignp
)
399 *sizep
= GLRO (dl_tls_static_size
);
400 *alignp
= GLRO (dl_tls_static_align
);
403 /* Derive the location of the pointer to the start of the original
404 allocation (before alignment) from the pointer to the TCB. */
405 static inline void **
406 tcb_to_pointer_to_free_location (void *tcb
)
409 /* The TCB follows the TLS blocks, and the pointer to the front
411 void **original_pointer_location
= tcb
+ TLS_TCB_SIZE
;
413 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
415 void **original_pointer_location
= tcb
- TLS_PRE_TCB_SIZE
- sizeof (void *);
417 return original_pointer_location
;
421 _dl_allocate_tls_storage (void)
424 size_t size
= GLRO (dl_tls_static_size
);
428 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
429 ^ This should be returned. */
430 size
+= TLS_PRE_TCB_SIZE
;
433 /* Perform the allocation. Reserve space for the required alignment
434 and the pointer to the original allocation. */
435 size_t alignment
= GLRO (dl_tls_static_align
);
436 void *allocated
= malloc (size
+ alignment
+ sizeof (void *));
437 if (__glibc_unlikely (allocated
== NULL
))
440 /* Perform alignment and allocate the DTV. */
442 /* The TCB follows the TLS blocks, which determine the alignment.
443 (TCB alignment requirements have been taken into account when
444 calculating GLRO (dl_tls_static_align).) */
445 void *aligned
= (void *) roundup ((uintptr_t) allocated
, alignment
);
446 result
= aligned
+ size
- TLS_TCB_SIZE
;
448 /* Clear the TCB data structure. We can't ask the caller (i.e.
449 libpthread) to do it, because we will initialize the DTV et al. */
450 memset (result
, '\0', TLS_TCB_SIZE
);
452 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
453 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
454 TLS block alignment, and not just the TLS blocks after it. This
455 can leave an unused alignment gap between the TCB and the TLS
457 result
= (void *) roundup
458 (sizeof (void *) + TLS_PRE_TCB_SIZE
+ (uintptr_t) allocated
,
461 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
462 it. We can't ask the caller (i.e. libpthread) to do it, because
463 we will initialize the DTV et al. */
464 memset (result
- TLS_PRE_TCB_SIZE
, '\0', TLS_PRE_TCB_SIZE
+ TLS_TCB_SIZE
);
467 /* Record the value of the original pointer for later
469 *tcb_to_pointer_to_free_location (result
) = allocated
;
471 result
= allocate_dtv (result
);
479 extern dtv_t _dl_static_dtv
[];
480 # define _dl_initial_dtv (&_dl_static_dtv[1])
484 _dl_resize_dtv (dtv_t
*dtv
, size_t max_modid
)
486 /* Resize the dtv. */
488 size_t newsize
= max_modid
+ DTV_SURPLUS
;
489 size_t oldsize
= dtv
[-1].counter
;
491 if (dtv
== GL(dl_initial_dtv
))
493 /* This is the initial dtv that was either statically allocated in
494 __libc_setup_tls or allocated during rtld startup using the
495 dl-minimal.c malloc instead of the real malloc. We can't free
496 it, we have to abandon the old storage. */
498 newp
= malloc ((2 + newsize
) * sizeof (dtv_t
));
501 memcpy (newp
, &dtv
[-1], (2 + oldsize
) * sizeof (dtv_t
));
505 newp
= realloc (&dtv
[-1],
506 (2 + newsize
) * sizeof (dtv_t
));
511 newp
[0].counter
= newsize
;
513 /* Clear the newly allocated part. */
514 memset (newp
+ 2 + oldsize
, '\0',
515 (newsize
- oldsize
) * sizeof (dtv_t
));
517 /* Return the generation counter. */
522 /* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
523 for the TLS space. The DTV may be resized, and so this function may
524 call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
525 is taken when manipulating global TLS-related data in the loader. */
527 _dl_allocate_tls_init (void *result
, bool init_tls
)
530 /* The memory allocation failed. */
533 dtv_t
*dtv
= GET_DTV (result
);
534 struct dtv_slotinfo_list
*listp
;
538 /* Protects global dynamic TLS related state. */
539 __rtld_lock_lock_recursive (GL(dl_load_tls_lock
));
541 /* Check if the current dtv is big enough. */
542 if (dtv
[-1].counter
< GL(dl_tls_max_dtv_idx
))
544 /* Resize the dtv. */
545 dtv
= _dl_resize_dtv (dtv
, GL(dl_tls_max_dtv_idx
));
547 /* Install this new dtv in the thread data structures. */
548 INSTALL_DTV (result
, &dtv
[-1]);
551 /* We have to prepare the dtv for all currently loaded modules using
552 TLS. For those which are dynamically loaded we add the values
553 indicating deferred allocation. */
554 listp
= GL(dl_tls_dtv_slotinfo_list
);
559 for (cnt
= total
== 0 ? 1 : 0; cnt
< listp
->len
; ++cnt
)
561 struct link_map
*map
;
564 /* Check for the total number of used slots. */
565 if (total
+ cnt
> GL(dl_tls_max_dtv_idx
))
568 map
= listp
->slotinfo
[cnt
].map
;
573 /* Keep track of the maximum generation number. This might
574 not be the generation counter. */
575 assert (listp
->slotinfo
[cnt
].gen
<= GL(dl_tls_generation
));
576 maxgen
= MAX (maxgen
, listp
->slotinfo
[cnt
].gen
);
578 dtv
[map
->l_tls_modid
].pointer
.val
= TLS_DTV_UNALLOCATED
;
579 dtv
[map
->l_tls_modid
].pointer
.to_free
= NULL
;
581 if (map
->l_tls_offset
== NO_TLS_OFFSET
582 || map
->l_tls_offset
== FORCED_DYNAMIC_TLS_OFFSET
)
585 assert (map
->l_tls_modid
== total
+ cnt
);
586 assert (map
->l_tls_blocksize
>= map
->l_tls_initimage_size
);
588 assert ((size_t) map
->l_tls_offset
>= map
->l_tls_blocksize
);
589 dest
= (char *) result
- map
->l_tls_offset
;
591 dest
= (char *) result
+ map
->l_tls_offset
;
593 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
596 /* Set up the DTV entry. The simplified __tls_get_addr that
597 some platforms use in static programs requires it. */
598 dtv
[map
->l_tls_modid
].pointer
.val
= dest
;
600 /* Copy the initialization image and clear the BSS part. For
601 audit modules or dependencies with initial-exec TLS, we can not
602 set the initial TLS image on default loader initialization
603 because it would already be set by the audit setup. However,
604 subsequent thread creation would need to follow the default
606 if (map
->l_ns
!= LM_ID_BASE
&& !init_tls
)
608 memset (__mempcpy (dest
, map
->l_tls_initimage
,
609 map
->l_tls_initimage_size
), '\0',
610 map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
614 if (total
> GL(dl_tls_max_dtv_idx
))
618 assert (listp
!= NULL
);
620 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
622 /* The DTV version is up-to-date now. */
623 dtv
[0].counter
= maxgen
;
627 rtld_hidden_def (_dl_allocate_tls_init
)
630 _dl_allocate_tls (void *mem
)
632 return _dl_allocate_tls_init (mem
== NULL
633 ? _dl_allocate_tls_storage ()
634 : allocate_dtv (mem
), true);
636 rtld_hidden_def (_dl_allocate_tls
)
640 _dl_deallocate_tls (void *tcb
, bool dealloc_tcb
)
642 dtv_t
*dtv
= GET_DTV (tcb
);
644 /* We need to free the memory allocated for non-static TLS. */
645 for (size_t cnt
= 0; cnt
< dtv
[-1].counter
; ++cnt
)
646 free (dtv
[1 + cnt
].pointer
.to_free
);
648 /* The array starts with dtv[-1]. */
649 if (dtv
!= GL(dl_initial_dtv
))
653 free (*tcb_to_pointer_to_free_location (tcb
));
655 rtld_hidden_def (_dl_deallocate_tls
)
659 /* The __tls_get_addr function has two basic forms which differ in the
660 arguments. The IA-64 form takes two parameters, the module ID and
661 offset. The form used, among others, on IA-32 takes a reference to
662 a special structure which contain the same information. The second
663 form seems to be more often used (in the moment) so we default to
664 it. Users of the IA-64 form have to provide adequate definitions
665 of the following macros. */
666 # ifndef GET_ADDR_ARGS
667 # define GET_ADDR_ARGS tls_index *ti
668 # define GET_ADDR_PARAM ti
670 # ifndef GET_ADDR_MODULE
671 # define GET_ADDR_MODULE ti->ti_module
673 # ifndef GET_ADDR_OFFSET
674 # define GET_ADDR_OFFSET ti->ti_offset
677 /* Allocate one DTV entry. */
678 static struct dtv_pointer
679 allocate_dtv_entry (size_t alignment
, size_t size
)
681 if (powerof2 (alignment
) && alignment
<= _Alignof (max_align_t
))
683 /* The alignment is supported by malloc. */
684 void *ptr
= malloc (size
);
685 return (struct dtv_pointer
) { ptr
, ptr
};
688 /* Emulate memalign to by manually aligning a pointer returned by
689 malloc. First compute the size with an overflow check. */
690 size_t alloc_size
= size
+ alignment
;
691 if (alloc_size
< size
)
692 return (struct dtv_pointer
) {};
694 /* Perform the allocation. This is the pointer we need to free
696 void *start
= malloc (alloc_size
);
698 return (struct dtv_pointer
) {};
700 /* Find the aligned position within the larger allocation. */
701 void *aligned
= (void *) roundup ((uintptr_t) start
, alignment
);
703 return (struct dtv_pointer
) { .val
= aligned
, .to_free
= start
};
706 static struct dtv_pointer
707 allocate_and_init (struct link_map
*map
)
709 struct dtv_pointer result
= allocate_dtv_entry
710 (map
->l_tls_align
, map
->l_tls_blocksize
);
711 if (result
.val
== NULL
)
714 /* Initialize the memory. */
715 memset (__mempcpy (result
.val
, map
->l_tls_initimage
,
716 map
->l_tls_initimage_size
),
717 '\0', map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
724 _dl_update_slotinfo (unsigned long int req_modid
)
726 struct link_map
*the_map
= NULL
;
727 dtv_t
*dtv
= THREAD_DTV ();
729 /* The global dl_tls_dtv_slotinfo array contains for each module
730 index the generation counter current when the entry was created.
731 This array never shrinks so that all module indices which were
732 valid at some time can be used to access it. Before the first
733 use of a new module index in this function the array was extended
734 appropriately. Access also does not have to be guarded against
735 modifications of the array. It is assumed that pointer-size
736 values can be read atomically even in SMP environments. It is
737 possible that other threads at the same time dynamically load
738 code and therefore add to the slotinfo list. This is a problem
739 since we must not pick up any information about incomplete work.
740 The solution to this is to ignore all dtv slots which were
741 created after the one we are currently interested. We know that
742 dynamic loading for this module is completed and this is the last
743 load operation we know finished. */
744 unsigned long int idx
= req_modid
;
745 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
747 while (idx
>= listp
->len
)
753 if (dtv
[0].counter
< listp
->slotinfo
[idx
].gen
)
755 /* CONCURRENCY NOTES:
757 Here the dtv needs to be updated to new_gen generation count.
759 This code may be called during TLS access when GL(dl_load_tls_lock)
760 is not held. In that case the user code has to synchronize with
761 dlopen and dlclose calls of relevant modules. A module m is
762 relevant if the generation of m <= new_gen and dlclose of m is
763 synchronized: a memory access here happens after the dlopen and
764 before the dlclose of relevant modules. The dtv entries for
765 relevant modules need to be updated, other entries can be
768 This e.g. means that the first part of the slotinfo list can be
769 accessed race free, but the tail may be concurrently extended.
770 Similarly relevant slotinfo entries can be read race free, but
771 other entries are racy. However updating a non-relevant dtv
772 entry does not affect correctness. For a relevant module m,
773 max_modid >= modid of m. */
774 size_t new_gen
= listp
->slotinfo
[idx
].gen
;
776 size_t max_modid
= atomic_load_relaxed (&GL(dl_tls_max_dtv_idx
));
777 assert (max_modid
>= req_modid
);
779 /* We have to look through the entire dtv slotinfo list. */
780 listp
= GL(dl_tls_dtv_slotinfo_list
);
783 for (size_t cnt
= total
== 0 ? 1 : 0; cnt
< listp
->len
; ++cnt
)
785 size_t modid
= total
+ cnt
;
787 /* Later entries are not relevant. */
788 if (modid
> max_modid
)
791 size_t gen
= atomic_load_relaxed (&listp
->slotinfo
[cnt
].gen
);
797 /* If the entry is older than the current dtv layout we
798 know we don't have to handle it. */
799 if (gen
<= dtv
[0].counter
)
802 /* If there is no map this means the entry is empty. */
804 = atomic_load_relaxed (&listp
->slotinfo
[cnt
].map
);
805 /* Check whether the current dtv array is large enough. */
806 if (dtv
[-1].counter
< modid
)
811 /* Resize the dtv. */
812 dtv
= _dl_resize_dtv (dtv
, max_modid
);
814 assert (modid
<= dtv
[-1].counter
);
816 /* Install this new dtv in the thread data
818 INSTALL_NEW_DTV (dtv
);
821 /* If there is currently memory allocate for this
822 dtv entry free it. */
823 /* XXX Ideally we will at some point create a memory
825 free (dtv
[modid
].pointer
.to_free
);
826 dtv
[modid
].pointer
.val
= TLS_DTV_UNALLOCATED
;
827 dtv
[modid
].pointer
.to_free
= NULL
;
829 if (modid
== req_modid
)
834 if (total
> max_modid
)
837 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
838 be consume MO since we only need to order the accesses to
839 the next node after the read of the address and on most
840 hardware (other than alpha) a normal load would do that
841 because of the address dependency. */
842 listp
= atomic_load_acquire (&listp
->next
);
844 while (listp
!= NULL
);
846 /* This will be the new maximum generation counter. */
847 dtv
[0].counter
= new_gen
;
855 __attribute_noinline__
856 tls_get_addr_tail (GET_ADDR_ARGS
, dtv_t
*dtv
, struct link_map
*the_map
)
858 /* The allocation was deferred. Do it now. */
861 /* Find the link map for this module. */
862 size_t idx
= GET_ADDR_MODULE
;
863 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
865 while (idx
>= listp
->len
)
871 the_map
= listp
->slotinfo
[idx
].map
;
874 /* Make sure that, if a dlopen running in parallel forces the
875 variable into static storage, we'll wait until the address in the
876 static TLS block is set up, and use that. If we're undecided
877 yet, make sure we make the decision holding the lock as well. */
878 if (__glibc_unlikely (the_map
->l_tls_offset
879 != FORCED_DYNAMIC_TLS_OFFSET
))
881 __rtld_lock_lock_recursive (GL(dl_load_tls_lock
));
882 if (__glibc_likely (the_map
->l_tls_offset
== NO_TLS_OFFSET
))
884 the_map
->l_tls_offset
= FORCED_DYNAMIC_TLS_OFFSET
;
885 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
887 else if (__glibc_likely (the_map
->l_tls_offset
888 != FORCED_DYNAMIC_TLS_OFFSET
))
891 void *p
= (char *) THREAD_SELF
- the_map
->l_tls_offset
;
893 void *p
= (char *) THREAD_SELF
+ the_map
->l_tls_offset
+ TLS_PRE_TCB_SIZE
;
895 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
897 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
899 dtv
[GET_ADDR_MODULE
].pointer
.to_free
= NULL
;
900 dtv
[GET_ADDR_MODULE
].pointer
.val
= p
;
902 return (char *) p
+ GET_ADDR_OFFSET
;
905 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock
));
907 struct dtv_pointer result
= allocate_and_init (the_map
);
908 dtv
[GET_ADDR_MODULE
].pointer
= result
;
909 assert (result
.to_free
!= NULL
);
911 return (char *) result
.val
+ GET_ADDR_OFFSET
;
915 static struct link_map
*
916 __attribute_noinline__
917 update_get_addr (GET_ADDR_ARGS
)
919 struct link_map
*the_map
= _dl_update_slotinfo (GET_ADDR_MODULE
);
920 dtv_t
*dtv
= THREAD_DTV ();
922 void *p
= dtv
[GET_ADDR_MODULE
].pointer
.val
;
924 if (__glibc_unlikely (p
== TLS_DTV_UNALLOCATED
))
925 return tls_get_addr_tail (GET_ADDR_PARAM
, dtv
, the_map
);
927 return (void *) p
+ GET_ADDR_OFFSET
;
930 /* For all machines that have a non-macro version of __tls_get_addr, we
931 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
932 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
933 in ld.so for __tls_get_addr. */
935 #ifndef __tls_get_addr
936 extern void * __tls_get_addr (GET_ADDR_ARGS
);
937 rtld_hidden_proto (__tls_get_addr
)
938 rtld_hidden_def (__tls_get_addr
)
941 /* The generic dynamic and local dynamic model cannot be used in
942 statically linked applications. */
944 __tls_get_addr (GET_ADDR_ARGS
)
946 dtv_t
*dtv
= THREAD_DTV ();
948 /* Update is needed if dtv[0].counter < the generation of the accessed
949 module. The global generation counter is used here as it is easier
950 to check. Synchronization for the relaxed MO access is guaranteed
951 by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */
952 size_t gen
= atomic_load_relaxed (&GL(dl_tls_generation
));
953 if (__glibc_unlikely (dtv
[0].counter
!= gen
))
954 return update_get_addr (GET_ADDR_PARAM
);
956 void *p
= dtv
[GET_ADDR_MODULE
].pointer
.val
;
958 if (__glibc_unlikely (p
== TLS_DTV_UNALLOCATED
))
959 return tls_get_addr_tail (GET_ADDR_PARAM
, dtv
, NULL
);
961 return (char *) p
+ GET_ADDR_OFFSET
;
966 /* Look up the module's TLS block as for __tls_get_addr,
967 but never touch anything. Return null if it's not allocated yet. */
969 _dl_tls_get_addr_soft (struct link_map
*l
)
971 if (__glibc_unlikely (l
->l_tls_modid
== 0))
972 /* This module has no TLS segment. */
975 dtv_t
*dtv
= THREAD_DTV ();
976 /* This may be called without holding the GL(dl_load_tls_lock). Reading
977 arbitrary gen value is fine since this is best effort code. */
978 size_t gen
= atomic_load_relaxed (&GL(dl_tls_generation
));
979 if (__glibc_unlikely (dtv
[0].counter
!= gen
))
981 /* This thread's DTV is not completely current,
982 but it might already cover this module. */
984 if (l
->l_tls_modid
>= dtv
[-1].counter
)
988 size_t idx
= l
->l_tls_modid
;
989 struct dtv_slotinfo_list
*listp
= GL(dl_tls_dtv_slotinfo_list
);
990 while (idx
>= listp
->len
)
996 /* We've reached the slot for this module.
997 If its generation counter is higher than the DTV's,
998 this thread does not know about this module yet. */
999 if (dtv
[0].counter
< listp
->slotinfo
[idx
].gen
)
1003 void *data
= dtv
[l
->l_tls_modid
].pointer
.val
;
1004 if (__glibc_unlikely (data
== TLS_DTV_UNALLOCATED
))
1005 /* The DTV is current, but this thread has not yet needed
1006 to allocate this module's segment. */
1014 _dl_add_to_slotinfo (struct link_map
*l
, bool do_add
)
1016 /* Now that we know the object is loaded successfully add
1017 modules containing TLS data to the dtv info table. We
1018 might have to increase its size. */
1019 struct dtv_slotinfo_list
*listp
;
1020 struct dtv_slotinfo_list
*prevp
;
1021 size_t idx
= l
->l_tls_modid
;
1023 /* Find the place in the dtv slotinfo list. */
1024 listp
= GL(dl_tls_dtv_slotinfo_list
);
1025 prevp
= NULL
; /* Needed to shut up gcc. */
1028 /* Does it fit in the array of this list element? */
1029 if (idx
< listp
->len
)
1033 listp
= listp
->next
;
1035 while (listp
!= NULL
);
1039 /* When we come here it means we have to add a new element
1040 to the slotinfo list. And the new module must be in
1044 listp
= (struct dtv_slotinfo_list
*)
1045 malloc (sizeof (struct dtv_slotinfo_list
)
1046 + TLS_SLOTINFO_SURPLUS
* sizeof (struct dtv_slotinfo
));
1049 /* We ran out of memory while resizing the dtv slotinfo list. */
1050 _dl_signal_error (ENOMEM
, "dlopen", NULL
, N_("\
1051 cannot create TLS data structures"));
1054 listp
->len
= TLS_SLOTINFO_SURPLUS
;
1056 memset (listp
->slotinfo
, '\0',
1057 TLS_SLOTINFO_SURPLUS
* sizeof (struct dtv_slotinfo
));
1058 /* Synchronize with _dl_update_slotinfo. */
1059 atomic_store_release (&prevp
->next
, listp
);
1062 /* Add the information into the slotinfo data structure. */
1065 /* Can be read concurrently. See _dl_update_slotinfo. */
1066 atomic_store_relaxed (&listp
->slotinfo
[idx
].map
, l
);
1067 atomic_store_relaxed (&listp
->slotinfo
[idx
].gen
,
1068 GL(dl_tls_generation
) + 1);
1073 static inline void __attribute__((always_inline
))
1074 init_one_static_tls (struct pthread
*curp
, struct link_map
*map
)
1077 void *dest
= (char *) curp
- map
->l_tls_offset
;
1078 # elif TLS_DTV_AT_TP
1079 void *dest
= (char *) curp
+ map
->l_tls_offset
+ TLS_PRE_TCB_SIZE
;
1081 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1084 /* Initialize the memory. */
1085 memset (__mempcpy (dest
, map
->l_tls_initimage
, map
->l_tls_initimage_size
),
1086 '\0', map
->l_tls_blocksize
- map
->l_tls_initimage_size
);
1090 _dl_init_static_tls (struct link_map
*map
)
1092 lll_lock (GL (dl_stack_cache_lock
), LLL_PRIVATE
);
1094 /* Iterate over the list with system-allocated threads first. */
1096 list_for_each (runp
, &GL (dl_stack_used
))
1097 init_one_static_tls (list_entry (runp
, struct pthread
, list
), map
);
1099 /* Now the list with threads using user-allocated stacks. */
1100 list_for_each (runp
, &GL (dl_stack_user
))
1101 init_one_static_tls (list_entry (runp
, struct pthread
, list
), map
);
1103 lll_unlock (GL (dl_stack_cache_lock
), LLL_PRIVATE
);
1105 #endif /* PTHREAD_IN_LIBC */