linux: Decorate __libc_fatal error buffer
[glibc.git] / elf / dl-tls.c
blobc192b5a13a9473bc1aea63b49c37ef30abeb48bb
1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <libintl.h>
22 #include <signal.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <sys/param.h>
26 #include <atomic.h>
28 #include <tls.h>
29 #include <dl-tls.h>
30 #include <ldsodefs.h>
32 #if PTHREAD_IN_LIBC
33 # include <list.h>
34 #endif
36 #define TUNABLE_NAMESPACE rtld
37 #include <dl-tunables.h>
39 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
44 initial namespace,
45 - and optionally for optimizing dynamic TLS access.
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
61 /* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65 #define LIBC_IE_TLS 144
67 /* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70 #define OTHER_IE_TLS 144
72 /* Default number of namespaces. */
73 #define DEFAULT_NNS 4
75 /* Default for dl_tls_static_optional. */
76 #define OPTIONAL_TLS 512
78 /* Compute the static TLS surplus based on the namespace count and the
79 TLS space that can be used for optimizations. */
80 static inline int
81 tls_static_surplus (int nns, int opt_tls)
83 return (nns - 1) * LIBC_IE_TLS + nns * OTHER_IE_TLS + opt_tls;
86 /* This value is chosen so that with default values for the tunables,
87 the computation of dl_tls_static_surplus in
88 _dl_tls_static_surplus_init yields the historic value 1664, for
89 backwards compatibility. */
90 #define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
92 /* Calculate the size of the static TLS surplus, when the given
93 number of audit modules are loaded. Must be called after the
94 number of audit modules is known and before static TLS allocation. */
95 void
96 _dl_tls_static_surplus_init (size_t naudit)
98 size_t nns, opt_tls;
100 nns = TUNABLE_GET (nns, size_t, NULL);
101 opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
102 if (nns > DL_NNS)
103 nns = DL_NNS;
104 if (DL_NNS - nns < naudit)
105 _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
106 (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
107 nns += naudit;
109 GL(dl_tls_static_optional) = opt_tls;
110 assert (LEGACY_TLS >= 0);
111 GLRO(dl_tls_static_surplus) = tls_static_surplus (nns, opt_tls) + LEGACY_TLS;
114 /* Out-of-memory handler. */
115 static void
116 __attribute__ ((__noreturn__))
117 oom (void)
119 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
123 void
124 _dl_assign_tls_modid (struct link_map *l)
126 size_t result;
128 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
130 size_t disp = 0;
131 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
133 /* Note that this branch will never be executed during program
134 start since there are no gaps at that time. Therefore it
135 does not matter that the dl_tls_dtv_slotinfo is not allocated
136 yet when the function is called for the first times.
138 NB: the offset +1 is due to the fact that DTV[0] is used
139 for something else. */
140 result = GL(dl_tls_static_nelem) + 1;
141 if (result <= GL(dl_tls_max_dtv_idx))
144 while (result - disp < runp->len)
146 if (runp->slotinfo[result - disp].map == NULL)
147 break;
149 ++result;
150 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
153 if (result - disp < runp->len)
155 /* Mark the entry as used, so any dependency see it. */
156 atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
157 break;
160 disp += runp->len;
162 while ((runp = runp->next) != NULL);
164 if (result > GL(dl_tls_max_dtv_idx))
166 /* The new index must indeed be exactly one higher than the
167 previous high. */
168 assert (result == GL(dl_tls_max_dtv_idx) + 1);
169 /* There is no gap anymore. */
170 GL(dl_tls_dtv_gaps) = false;
172 goto nogaps;
175 else
177 /* No gaps, allocate a new entry. */
178 nogaps:
180 result = GL(dl_tls_max_dtv_idx) + 1;
181 /* Can be read concurrently. */
182 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
185 l->l_tls_modid = result;
189 size_t
190 _dl_count_modids (void)
192 /* The count is the max unless dlclose or failed dlopen created gaps. */
193 if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
194 return GL(dl_tls_max_dtv_idx);
196 /* We have gaps and are forced to count the non-NULL entries. */
197 size_t n = 0;
198 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
199 while (runp != NULL)
201 for (size_t i = 0; i < runp->len; ++i)
202 if (runp->slotinfo[i].map != NULL)
203 ++n;
205 runp = runp->next;
208 return n;
212 #ifdef SHARED
213 void
214 _dl_determine_tlsoffset (void)
216 size_t max_align = TCB_ALIGNMENT;
217 size_t freetop = 0;
218 size_t freebottom = 0;
220 /* The first element of the dtv slot info list is allocated. */
221 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
222 /* There is at this point only one element in the
223 dl_tls_dtv_slotinfo_list list. */
224 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
226 struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
228 /* Determining the offset of the various parts of the static TLS
229 block has several dependencies. In addition we have to work
230 around bugs in some toolchains.
232 Each TLS block from the objects available at link time has a size
233 and an alignment requirement. The GNU ld computes the alignment
234 requirements for the data at the positions *in the file*, though.
235 I.e, it is not simply possible to allocate a block with the size
236 of the TLS program header entry. The data is laid out assuming
237 that the first byte of the TLS block fulfills
239 p_vaddr mod p_align == &TLS_BLOCK mod p_align
241 This means we have to add artificial padding at the beginning of
242 the TLS block. These bytes are never used for the TLS data in
243 this module but the first byte allocated must be aligned
244 according to mod p_align == 0 so that the first byte of the TLS
245 block is aligned according to p_vaddr mod p_align. This is ugly
246 and the linker can help by computing the offsets in the TLS block
247 assuming the first byte of the TLS block is aligned according to
248 p_align.
250 The extra space which might be allocated before the first byte of
251 the TLS block need not go unused. The code below tries to use
252 that memory for the next TLS block. This can work if the total
253 memory requirement for the next TLS block is smaller than the
254 gap. */
256 #if TLS_TCB_AT_TP
257 /* We simply start with zero. */
258 size_t offset = 0;
260 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
262 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
264 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
265 & (slotinfo[cnt].map->l_tls_align - 1));
266 size_t off;
267 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
269 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
271 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
272 - firstbyte, slotinfo[cnt].map->l_tls_align)
273 + firstbyte;
274 if (off <= freebottom)
276 freetop = off;
278 /* XXX For some architectures we perhaps should store the
279 negative offset. */
280 slotinfo[cnt].map->l_tls_offset = off;
281 continue;
285 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
286 slotinfo[cnt].map->l_tls_align) + firstbyte;
287 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
288 + (freebottom - freetop))
290 freetop = offset;
291 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
293 offset = off;
295 /* XXX For some architectures we perhaps should store the
296 negative offset. */
297 slotinfo[cnt].map->l_tls_offset = off;
300 GL(dl_tls_static_used) = offset;
301 GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
302 max_align)
303 + TLS_TCB_SIZE);
304 #elif TLS_DTV_AT_TP
305 /* The TLS blocks start right after the TCB. */
306 size_t offset = TLS_TCB_SIZE;
308 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
310 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
312 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
313 & (slotinfo[cnt].map->l_tls_align - 1));
314 size_t off;
315 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
317 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
319 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
320 if (off - freebottom < firstbyte)
321 off += slotinfo[cnt].map->l_tls_align;
322 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
324 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
325 freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
326 - firstbyte);
327 continue;
331 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
332 if (off - offset < firstbyte)
333 off += slotinfo[cnt].map->l_tls_align;
335 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
336 if (off - firstbyte - offset > freetop - freebottom)
338 freebottom = offset;
339 freetop = off - firstbyte;
342 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
345 GL(dl_tls_static_used) = offset;
346 GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
347 TCB_ALIGNMENT);
348 #else
349 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
350 #endif
352 /* The alignment requirement for the static TLS block. */
353 GLRO (dl_tls_static_align) = max_align;
355 #endif /* SHARED */
357 static void *
358 allocate_dtv (void *result)
360 dtv_t *dtv;
361 size_t dtv_length;
363 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
364 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
365 /* We allocate a few more elements in the dtv than are needed for the
366 initial set of modules. This should avoid in most cases expansions
367 of the dtv. */
368 dtv_length = max_modid + DTV_SURPLUS;
369 dtv = calloc (dtv_length + 2, sizeof (dtv_t));
370 if (dtv != NULL)
372 /* This is the initial length of the dtv. */
373 dtv[0].counter = dtv_length;
375 /* The rest of the dtv (including the generation counter) is
376 Initialize with zero to indicate nothing there. */
378 /* Add the dtv to the thread data structures. */
379 INSTALL_DTV (result, dtv);
381 else
382 result = NULL;
384 return result;
387 /* Get size and alignment requirements of the static TLS block. This
388 function is no longer used by glibc itself, but the GCC sanitizers
389 use it despite the GLIBC_PRIVATE status. */
390 void
391 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
393 *sizep = GLRO (dl_tls_static_size);
394 *alignp = GLRO (dl_tls_static_align);
397 /* Derive the location of the pointer to the start of the original
398 allocation (before alignment) from the pointer to the TCB. */
399 static inline void **
400 tcb_to_pointer_to_free_location (void *tcb)
402 #if TLS_TCB_AT_TP
403 /* The TCB follows the TLS blocks, and the pointer to the front
404 follows the TCB. */
405 void **original_pointer_location = tcb + TLS_TCB_SIZE;
406 #elif TLS_DTV_AT_TP
407 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
408 before that. */
409 void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
410 #endif
411 return original_pointer_location;
414 void *
415 _dl_allocate_tls_storage (void)
417 void *result;
418 size_t size = GLRO (dl_tls_static_size);
420 #if TLS_DTV_AT_TP
421 /* Memory layout is:
422 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
423 ^ This should be returned. */
424 size += TLS_PRE_TCB_SIZE;
425 #endif
427 /* Perform the allocation. Reserve space for the required alignment
428 and the pointer to the original allocation. */
429 size_t alignment = GLRO (dl_tls_static_align);
430 void *allocated = malloc (size + alignment + sizeof (void *));
431 if (__glibc_unlikely (allocated == NULL))
432 return NULL;
434 /* Perform alignment and allocate the DTV. */
435 #if TLS_TCB_AT_TP
436 /* The TCB follows the TLS blocks, which determine the alignment.
437 (TCB alignment requirements have been taken into account when
438 calculating GLRO (dl_tls_static_align).) */
439 void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
440 result = aligned + size - TLS_TCB_SIZE;
442 /* Clear the TCB data structure. We can't ask the caller (i.e.
443 libpthread) to do it, because we will initialize the DTV et al. */
444 memset (result, '\0', TLS_TCB_SIZE);
445 #elif TLS_DTV_AT_TP
446 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
447 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
448 TLS block alignment, and not just the TLS blocks after it. This
449 can leave an unused alignment gap between the TCB and the TLS
450 blocks. */
451 result = (void *) roundup
452 (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
453 alignment);
455 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
456 it. We can't ask the caller (i.e. libpthread) to do it, because
457 we will initialize the DTV et al. */
458 memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
459 #endif
461 /* Record the value of the original pointer for later
462 deallocation. */
463 *tcb_to_pointer_to_free_location (result) = allocated;
465 result = allocate_dtv (result);
466 if (result == NULL)
467 free (allocated);
468 return result;
472 #ifndef SHARED
473 extern dtv_t _dl_static_dtv[];
474 # define _dl_initial_dtv (&_dl_static_dtv[1])
475 #endif
477 static dtv_t *
478 _dl_resize_dtv (dtv_t *dtv, size_t max_modid)
480 /* Resize the dtv. */
481 dtv_t *newp;
482 size_t newsize = max_modid + DTV_SURPLUS;
483 size_t oldsize = dtv[-1].counter;
485 if (dtv == GL(dl_initial_dtv))
487 /* This is the initial dtv that was either statically allocated in
488 __libc_setup_tls or allocated during rtld startup using the
489 dl-minimal.c malloc instead of the real malloc. We can't free
490 it, we have to abandon the old storage. */
492 newp = malloc ((2 + newsize) * sizeof (dtv_t));
493 if (newp == NULL)
494 oom ();
495 memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
497 else
499 newp = realloc (&dtv[-1],
500 (2 + newsize) * sizeof (dtv_t));
501 if (newp == NULL)
502 oom ();
505 newp[0].counter = newsize;
507 /* Clear the newly allocated part. */
508 memset (newp + 2 + oldsize, '\0',
509 (newsize - oldsize) * sizeof (dtv_t));
511 /* Return the generation counter. */
512 return &newp[1];
516 /* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
517 for the TLS space. The DTV may be resized, and so this function may
518 call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
519 is taken when manipulating global TLS-related data in the loader. */
520 void *
521 _dl_allocate_tls_init (void *result, bool init_tls)
523 if (result == NULL)
524 /* The memory allocation failed. */
525 return NULL;
527 dtv_t *dtv = GET_DTV (result);
528 struct dtv_slotinfo_list *listp;
529 size_t total = 0;
530 size_t maxgen = 0;
532 /* Protects global dynamic TLS related state. */
533 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
535 /* Check if the current dtv is big enough. */
536 if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
538 /* Resize the dtv. */
539 dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
541 /* Install this new dtv in the thread data structures. */
542 INSTALL_DTV (result, &dtv[-1]);
545 /* We have to prepare the dtv for all currently loaded modules using
546 TLS. For those which are dynamically loaded we add the values
547 indicating deferred allocation. */
548 listp = GL(dl_tls_dtv_slotinfo_list);
549 while (1)
551 size_t cnt;
553 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
555 struct link_map *map;
556 void *dest;
558 /* Check for the total number of used slots. */
559 if (total + cnt > GL(dl_tls_max_dtv_idx))
560 break;
562 map = listp->slotinfo[cnt].map;
563 if (map == NULL)
564 /* Unused entry. */
565 continue;
567 /* Keep track of the maximum generation number. This might
568 not be the generation counter. */
569 assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
570 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
572 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
573 dtv[map->l_tls_modid].pointer.to_free = NULL;
575 if (map->l_tls_offset == NO_TLS_OFFSET
576 || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
577 continue;
579 assert (map->l_tls_modid == total + cnt);
580 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
581 #if TLS_TCB_AT_TP
582 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
583 dest = (char *) result - map->l_tls_offset;
584 #elif TLS_DTV_AT_TP
585 dest = (char *) result + map->l_tls_offset;
586 #else
587 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
588 #endif
590 /* Set up the DTV entry. The simplified __tls_get_addr that
591 some platforms use in static programs requires it. */
592 dtv[map->l_tls_modid].pointer.val = dest;
594 /* Copy the initialization image and clear the BSS part. For
595 audit modules or dependencies with initial-exec TLS, we can not
596 set the initial TLS image on default loader initialization
597 because it would already be set by the audit setup. However,
598 subsequent thread creation would need to follow the default
599 behaviour. */
600 if (map->l_ns != LM_ID_BASE && !init_tls)
601 continue;
602 memset (__mempcpy (dest, map->l_tls_initimage,
603 map->l_tls_initimage_size), '\0',
604 map->l_tls_blocksize - map->l_tls_initimage_size);
607 total += cnt;
608 if (total > GL(dl_tls_max_dtv_idx))
609 break;
611 listp = listp->next;
612 assert (listp != NULL);
614 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
616 /* The DTV version is up-to-date now. */
617 dtv[0].counter = maxgen;
619 return result;
621 rtld_hidden_def (_dl_allocate_tls_init)
623 void *
624 _dl_allocate_tls (void *mem)
626 return _dl_allocate_tls_init (mem == NULL
627 ? _dl_allocate_tls_storage ()
628 : allocate_dtv (mem), true);
630 rtld_hidden_def (_dl_allocate_tls)
633 void
634 _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
636 dtv_t *dtv = GET_DTV (tcb);
638 /* We need to free the memory allocated for non-static TLS. */
639 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
640 free (dtv[1 + cnt].pointer.to_free);
642 /* The array starts with dtv[-1]. */
643 if (dtv != GL(dl_initial_dtv))
644 free (dtv - 1);
646 if (dealloc_tcb)
647 free (*tcb_to_pointer_to_free_location (tcb));
649 rtld_hidden_def (_dl_deallocate_tls)
652 #ifdef SHARED
653 /* The __tls_get_addr function has two basic forms which differ in the
654 arguments. The IA-64 form takes two parameters, the module ID and
655 offset. The form used, among others, on IA-32 takes a reference to
656 a special structure which contain the same information. The second
657 form seems to be more often used (in the moment) so we default to
658 it. Users of the IA-64 form have to provide adequate definitions
659 of the following macros. */
660 # ifndef GET_ADDR_ARGS
661 # define GET_ADDR_ARGS tls_index *ti
662 # define GET_ADDR_PARAM ti
663 # endif
664 # ifndef GET_ADDR_MODULE
665 # define GET_ADDR_MODULE ti->ti_module
666 # endif
667 # ifndef GET_ADDR_OFFSET
668 # define GET_ADDR_OFFSET ti->ti_offset
669 # endif
671 /* Allocate one DTV entry. */
672 static struct dtv_pointer
673 allocate_dtv_entry (size_t alignment, size_t size)
675 if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
677 /* The alignment is supported by malloc. */
678 void *ptr = malloc (size);
679 return (struct dtv_pointer) { ptr, ptr };
682 /* Emulate memalign to by manually aligning a pointer returned by
683 malloc. First compute the size with an overflow check. */
684 size_t alloc_size = size + alignment;
685 if (alloc_size < size)
686 return (struct dtv_pointer) {};
688 /* Perform the allocation. This is the pointer we need to free
689 later. */
690 void *start = malloc (alloc_size);
691 if (start == NULL)
692 return (struct dtv_pointer) {};
694 /* Find the aligned position within the larger allocation. */
695 void *aligned = (void *) roundup ((uintptr_t) start, alignment);
697 return (struct dtv_pointer) { .val = aligned, .to_free = start };
700 static struct dtv_pointer
701 allocate_and_init (struct link_map *map)
703 struct dtv_pointer result = allocate_dtv_entry
704 (map->l_tls_align, map->l_tls_blocksize);
705 if (result.val == NULL)
706 oom ();
708 /* Initialize the memory. */
709 memset (__mempcpy (result.val, map->l_tls_initimage,
710 map->l_tls_initimage_size),
711 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
713 return result;
717 struct link_map *
718 _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
720 struct link_map *the_map = NULL;
721 dtv_t *dtv = THREAD_DTV ();
723 /* CONCURRENCY NOTES:
725 The global dl_tls_dtv_slotinfo_list array contains for each module
726 index the generation counter current when that entry was updated.
727 This array never shrinks so that all module indices which were
728 valid at some time can be used to access it. Concurrent loading
729 and unloading of modules can update slotinfo entries or extend
730 the array. The updates happen under the GL(dl_load_tls_lock) and
731 finish with the release store of the generation counter to
732 GL(dl_tls_generation) which is synchronized with the load of
733 new_gen in the caller. So updates up to new_gen are synchronized
734 but updates for later generations may not be.
736 Here we update the thread dtv from old_gen (== dtv[0].counter) to
737 new_gen generation. For this, each dtv[i] entry is either set to
738 an unallocated state (set), or left unmodified (nop). Where (set)
739 may resize the dtv first if modid i >= dtv[-1].counter. The rules
740 for the decision between (set) and (nop) are
742 (1) If slotinfo entry i is concurrently updated then either (set)
743 or (nop) is valid: TLS access cannot use dtv[i] unless it is
744 synchronized with a generation > new_gen.
746 Otherwise, if the generation of slotinfo entry i is gen and the
747 loaded module for this entry is map then
749 (2) If gen <= old_gen then do (nop).
751 (3) If old_gen < gen <= new_gen then
752 (3.1) if map != 0 then (set)
753 (3.2) if map == 0 then either (set) or (nop).
755 Note that (1) cannot be reliably detected, but since both actions
756 are valid it does not have to be. Only (2) and (3.1) cases need
757 to be distinguished for which relaxed mo access of gen and map is
758 enough: their value is synchronized when it matters.
760 Note that a relaxed mo load may give an out-of-thin-air value since
761 it is used in decisions that can affect concurrent stores. But this
762 should only happen if the OOTA value causes UB that justifies the
763 concurrent store of the value. This is not expected to be an issue
764 in practice. */
765 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
767 if (dtv[0].counter < new_gen)
769 size_t total = 0;
770 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
771 assert (max_modid >= req_modid);
773 /* We have to look through the entire dtv slotinfo list. */
774 listp = GL(dl_tls_dtv_slotinfo_list);
777 for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
779 size_t modid = total + cnt;
781 /* Case (1) for all later modids. */
782 if (modid > max_modid)
783 break;
785 size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
787 /* Case (1). */
788 if (gen > new_gen)
789 continue;
791 /* Case (2) or (1). */
792 if (gen <= dtv[0].counter)
793 continue;
795 /* Case (3) or (1). */
797 /* If there is no map this means the entry is empty. */
798 struct link_map *map
799 = atomic_load_relaxed (&listp->slotinfo[cnt].map);
800 /* Check whether the current dtv array is large enough. */
801 if (dtv[-1].counter < modid)
803 /* Case (3.2) or (1). */
804 if (map == NULL)
805 continue;
807 /* Resizing the dtv aborts on failure: bug 16134. */
808 dtv = _dl_resize_dtv (dtv, max_modid);
810 assert (modid <= dtv[-1].counter);
812 /* Install this new dtv in the thread data
813 structures. */
814 INSTALL_NEW_DTV (dtv);
817 /* If there is currently memory allocate for this
818 dtv entry free it. Note: this is not AS-safe. */
819 /* XXX Ideally we will at some point create a memory
820 pool. */
821 free (dtv[modid].pointer.to_free);
822 dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
823 dtv[modid].pointer.to_free = NULL;
825 if (modid == req_modid)
826 the_map = map;
829 total += listp->len;
830 if (total > max_modid)
831 break;
833 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
834 be consume MO since we only need to order the accesses to
835 the next node after the read of the address and on most
836 hardware (other than alpha) a normal load would do that
837 because of the address dependency. */
838 listp = atomic_load_acquire (&listp->next);
840 while (listp != NULL);
842 /* This will be the new maximum generation counter. */
843 dtv[0].counter = new_gen;
846 return the_map;
850 static void *
851 __attribute_noinline__
852 tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
854 /* The allocation was deferred. Do it now. */
855 if (the_map == NULL)
857 /* Find the link map for this module. */
858 size_t idx = GET_ADDR_MODULE;
859 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
861 while (idx >= listp->len)
863 idx -= listp->len;
864 listp = listp->next;
867 the_map = listp->slotinfo[idx].map;
870 /* Make sure that, if a dlopen running in parallel forces the
871 variable into static storage, we'll wait until the address in the
872 static TLS block is set up, and use that. If we're undecided
873 yet, make sure we make the decision holding the lock as well. */
874 if (__glibc_unlikely (the_map->l_tls_offset
875 != FORCED_DYNAMIC_TLS_OFFSET))
877 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
878 if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
880 the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
881 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
883 else if (__glibc_likely (the_map->l_tls_offset
884 != FORCED_DYNAMIC_TLS_OFFSET))
886 #if TLS_TCB_AT_TP
887 void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
888 #elif TLS_DTV_AT_TP
889 void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
890 #else
891 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
892 #endif
893 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
895 dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
896 dtv[GET_ADDR_MODULE].pointer.val = p;
898 return (char *) p + GET_ADDR_OFFSET;
900 else
901 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
903 struct dtv_pointer result = allocate_and_init (the_map);
904 dtv[GET_ADDR_MODULE].pointer = result;
905 assert (result.to_free != NULL);
907 return (char *) result.val + GET_ADDR_OFFSET;
911 static struct link_map *
912 __attribute_noinline__
913 update_get_addr (GET_ADDR_ARGS, size_t gen)
915 struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
916 dtv_t *dtv = THREAD_DTV ();
918 void *p = dtv[GET_ADDR_MODULE].pointer.val;
920 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
921 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
923 return (void *) p + GET_ADDR_OFFSET;
926 /* For all machines that have a non-macro version of __tls_get_addr, we
927 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
928 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
929 in ld.so for __tls_get_addr. */
931 #ifndef __tls_get_addr
932 extern void * __tls_get_addr (GET_ADDR_ARGS);
933 rtld_hidden_proto (__tls_get_addr)
934 rtld_hidden_def (__tls_get_addr)
935 #endif
937 /* The generic dynamic and local dynamic model cannot be used in
938 statically linked applications. */
939 void *
940 __tls_get_addr (GET_ADDR_ARGS)
942 dtv_t *dtv = THREAD_DTV ();
944 /* Update is needed if dtv[0].counter < the generation of the accessed
945 module, but the global generation counter is easier to check (which
946 must be synchronized up to the generation of the accessed module by
947 user code doing the TLS access so relaxed mo read is enough). */
948 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
949 if (__glibc_unlikely (dtv[0].counter != gen))
951 /* Update DTV up to the global generation, see CONCURRENCY NOTES
952 in _dl_update_slotinfo. */
953 gen = atomic_load_acquire (&GL(dl_tls_generation));
954 return update_get_addr (GET_ADDR_PARAM, gen);
957 void *p = dtv[GET_ADDR_MODULE].pointer.val;
959 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
960 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
962 return (char *) p + GET_ADDR_OFFSET;
964 #endif
967 /* Look up the module's TLS block as for __tls_get_addr,
968 but never touch anything. Return null if it's not allocated yet. */
969 void *
970 _dl_tls_get_addr_soft (struct link_map *l)
972 if (__glibc_unlikely (l->l_tls_modid == 0))
973 /* This module has no TLS segment. */
974 return NULL;
976 dtv_t *dtv = THREAD_DTV ();
977 /* This may be called without holding the GL(dl_load_tls_lock). Reading
978 arbitrary gen value is fine since this is best effort code. */
979 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
980 if (__glibc_unlikely (dtv[0].counter != gen))
982 /* This thread's DTV is not completely current,
983 but it might already cover this module. */
985 if (l->l_tls_modid >= dtv[-1].counter)
986 /* Nope. */
987 return NULL;
989 size_t idx = l->l_tls_modid;
990 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
991 while (idx >= listp->len)
993 idx -= listp->len;
994 listp = listp->next;
997 /* We've reached the slot for this module.
998 If its generation counter is higher than the DTV's,
999 this thread does not know about this module yet. */
1000 if (dtv[0].counter < listp->slotinfo[idx].gen)
1001 return NULL;
1004 void *data = dtv[l->l_tls_modid].pointer.val;
1005 if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
1006 /* The DTV is current, but this thread has not yet needed
1007 to allocate this module's segment. */
1008 data = NULL;
1010 return data;
1014 void
1015 _dl_add_to_slotinfo (struct link_map *l, bool do_add)
1017 /* Now that we know the object is loaded successfully add
1018 modules containing TLS data to the dtv info table. We
1019 might have to increase its size. */
1020 struct dtv_slotinfo_list *listp;
1021 struct dtv_slotinfo_list *prevp;
1022 size_t idx = l->l_tls_modid;
1024 /* Find the place in the dtv slotinfo list. */
1025 listp = GL(dl_tls_dtv_slotinfo_list);
1026 prevp = NULL; /* Needed to shut up gcc. */
1029 /* Does it fit in the array of this list element? */
1030 if (idx < listp->len)
1031 break;
1032 idx -= listp->len;
1033 prevp = listp;
1034 listp = listp->next;
1036 while (listp != NULL);
1038 if (listp == NULL)
1040 /* When we come here it means we have to add a new element
1041 to the slotinfo list. And the new module must be in
1042 the first slot. */
1043 assert (idx == 0);
1045 listp = (struct dtv_slotinfo_list *)
1046 malloc (sizeof (struct dtv_slotinfo_list)
1047 + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1048 if (listp == NULL)
1050 /* We ran out of memory while resizing the dtv slotinfo list. */
1051 _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
1052 cannot create TLS data structures"));
1055 listp->len = TLS_SLOTINFO_SURPLUS;
1056 listp->next = NULL;
1057 memset (listp->slotinfo, '\0',
1058 TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1059 /* Synchronize with _dl_update_slotinfo. */
1060 atomic_store_release (&prevp->next, listp);
1063 /* Add the information into the slotinfo data structure. */
1064 if (do_add)
1066 /* Can be read concurrently. See _dl_update_slotinfo. */
1067 atomic_store_relaxed (&listp->slotinfo[idx].map, l);
1068 atomic_store_relaxed (&listp->slotinfo[idx].gen,
1069 GL(dl_tls_generation) + 1);
1073 #if PTHREAD_IN_LIBC
1074 static inline void __attribute__((always_inline))
1075 init_one_static_tls (struct pthread *curp, struct link_map *map)
1077 # if TLS_TCB_AT_TP
1078 void *dest = (char *) curp - map->l_tls_offset;
1079 # elif TLS_DTV_AT_TP
1080 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1081 # else
1082 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1083 # endif
1085 /* Initialize the memory. */
1086 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1087 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1090 void
1091 _dl_init_static_tls (struct link_map *map)
1093 lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1095 /* Iterate over the list with system-allocated threads first. */
1096 list_t *runp;
1097 list_for_each (runp, &GL (dl_stack_used))
1098 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1100 /* Now the list with threads using user-allocated stacks. */
1101 list_for_each (runp, &GL (dl_stack_user))
1102 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1104 lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1106 #endif /* PTHREAD_IN_LIBC */