AArch64: List official cores before codenames
[official-gcc.git] / libgomp / team.c
blobb4fd6f2704c394fab6bd7ffdd0f08267608de42c
1 /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 unsigned int num_teams;
60 unsigned int team_num;
61 bool nested;
62 pthread_t handle;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
69 static void *
70 gomp_thread_start (void *xdata)
72 struct gomp_thread_start_data *data = xdata;
73 struct gomp_thread *thr;
74 struct gomp_thread_pool *pool;
75 void (*local_fn) (void *);
76 void *local_data;
78 #if defined HAVE_TLS || defined USE_EMUTLS
79 thr = &gomp_tls_data;
80 #else
81 struct gomp_thread local_thr;
82 thr = &local_thr;
83 #endif
84 gomp_sem_init (&thr->release, 0);
86 /* Extract what we need from data. */
87 local_fn = data->fn;
88 local_data = data->fn_data;
89 thr->thread_pool = data->thread_pool;
90 thr->ts = data->ts;
91 thr->task = data->task;
92 thr->place = data->place;
93 thr->num_teams = data->num_teams;
94 thr->team_num = data->team_num;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr->handle = data->handle;
97 #endif
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key, thr);
100 #endif
102 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
104 /* Make thread pool local. */
105 pool = thr->thread_pool;
107 if (data->nested)
109 struct gomp_team *team = thr->ts.team;
110 struct gomp_task *task = thr->task;
112 gomp_barrier_wait (&team->barrier);
114 local_fn (local_data);
115 gomp_team_barrier_wait_final (&team->barrier);
116 gomp_finish_task (task);
117 gomp_barrier_wait_last (&team->barrier);
119 else
121 pool->threads[thr->ts.team_id] = thr;
123 gomp_simple_barrier_wait (&pool->threads_dock);
126 struct gomp_team *team = thr->ts.team;
127 struct gomp_task *task = thr->task;
129 local_fn (local_data);
130 gomp_team_barrier_wait_final (&team->barrier);
131 gomp_finish_task (task);
133 gomp_simple_barrier_wait (&pool->threads_dock);
135 local_fn = thr->fn;
136 local_data = thr->data;
137 thr->fn = NULL;
139 while (local_fn);
142 gomp_sem_destroy (&thr->release);
143 pthread_detach (pthread_self ());
144 thr->thread_pool = NULL;
145 thr->task = NULL;
146 return NULL;
148 #endif
150 static inline struct gomp_team *
151 get_last_team (unsigned nthreads)
153 struct gomp_thread *thr = gomp_thread ();
154 if (thr->ts.team == NULL)
156 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157 struct gomp_team *last_team = pool->last_team;
158 if (last_team != NULL && last_team->nthreads == nthreads)
160 pool->last_team = NULL;
161 return last_team;
164 return NULL;
167 /* Create a new team data structure. */
169 struct gomp_team *
170 gomp_new_team (unsigned nthreads)
172 struct gomp_team *team;
173 int i;
175 team = get_last_team (nthreads);
176 if (team == NULL)
178 size_t extra = sizeof (team->ordered_release[0])
179 + sizeof (team->implicit_task[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team = gomp_aligned_alloc (__alignof (struct gomp_team),
182 sizeof (*team) + nthreads * extra);
183 #else
184 team = team_malloc (sizeof (*team) + nthreads * extra);
185 #endif
187 #ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team->work_share_list_free_lock);
189 #endif
190 gomp_barrier_init (&team->barrier, nthreads);
191 gomp_mutex_init (&team->task_lock);
193 team->nthreads = nthreads;
196 team->work_share_chunk = 8;
197 #ifdef HAVE_SYNC_BUILTINS
198 team->single_count = 0;
199 #endif
200 team->work_shares_to_free = &team->work_shares[0];
201 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
202 team->work_shares[0].next_alloc = NULL;
203 team->work_share_list_free = NULL;
204 team->work_share_list_alloc = &team->work_shares[1];
205 for (i = 1; i < 7; i++)
206 team->work_shares[i].next_free = &team->work_shares[i + 1];
207 team->work_shares[i].next_free = NULL;
209 gomp_sem_init (&team->master_release, 0);
210 team->ordered_release = (void *) &team->implicit_task[nthreads];
211 team->ordered_release[0] = &team->master_release;
213 priority_queue_init (&team->task_queue);
214 team->task_count = 0;
215 team->task_queued_count = 0;
216 team->task_running_count = 0;
217 team->work_share_cancelled = 0;
218 team->team_cancelled = 0;
220 team->task_detach_count = 0;
222 return team;
226 /* Free a team data structure. */
228 static void
229 free_team (struct gomp_team *team)
231 #ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team->work_share_list_free_lock);
233 #endif
234 gomp_barrier_destroy (&team->barrier);
235 gomp_mutex_destroy (&team->task_lock);
236 priority_queue_free (&team->task_queue);
237 team_free (team);
240 static void
241 gomp_free_pool_helper (void *thread_pool)
243 struct gomp_thread *thr = gomp_thread ();
244 struct gomp_thread_pool *pool
245 = (struct gomp_thread_pool *) thread_pool;
246 gomp_simple_barrier_wait_last (&pool->threads_dock);
247 gomp_sem_destroy (&thr->release);
248 thr->thread_pool = NULL;
249 thr->task = NULL;
250 #ifdef LIBGOMP_USE_PTHREADS
251 pthread_detach (pthread_self ());
252 pthread_exit (NULL);
253 #elif defined(__nvptx__)
254 asm ("exit;");
255 #elif defined(__AMDGCN__)
256 asm ("s_dcache_wb\n\t"
257 "s_endpgm");
258 #else
259 #error gomp_free_pool_helper must terminate the thread
260 #endif
263 /* Free a thread pool and release its threads. */
265 void
266 gomp_free_thread (void *arg __attribute__((unused)))
268 struct gomp_thread *thr = gomp_thread ();
269 struct gomp_thread_pool *pool = thr->thread_pool;
270 if (pool)
272 if (pool->threads_used > 0)
274 int i;
275 for (i = 1; i < pool->threads_used; i++)
277 struct gomp_thread *nthr = pool->threads[i];
278 nthr->fn = gomp_free_pool_helper;
279 nthr->data = pool;
281 /* This barrier undocks threads docked on pool->threads_dock. */
282 gomp_simple_barrier_wait (&pool->threads_dock);
283 /* And this waits till all threads have called gomp_barrier_wait_last
284 in gomp_free_pool_helper. */
285 gomp_simple_barrier_wait (&pool->threads_dock);
286 /* Now it is safe to destroy the barrier and free the pool. */
287 gomp_simple_barrier_destroy (&pool->threads_dock);
289 #ifdef HAVE_SYNC_BUILTINS
290 __sync_fetch_and_add (&gomp_managed_threads,
291 1L - pool->threads_used);
292 #else
293 gomp_mutex_lock (&gomp_managed_threads_lock);
294 gomp_managed_threads -= pool->threads_used - 1L;
295 gomp_mutex_unlock (&gomp_managed_threads_lock);
296 #endif
298 if (pool->last_team)
299 free_team (pool->last_team);
300 #ifndef __nvptx__
301 team_free (pool->threads);
302 team_free (pool);
303 #endif
304 thr->thread_pool = NULL;
306 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
307 gomp_team_end ();
308 if (thr->task != NULL)
310 struct gomp_task *task = thr->task;
311 gomp_end_task ();
312 free (task);
316 /* Launch a team. */
318 #ifdef LIBGOMP_USE_PTHREADS
319 void
320 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
321 unsigned flags, struct gomp_team *team,
322 struct gomp_taskgroup *taskgroup)
324 struct gomp_thread_start_data *start_data = NULL;
325 struct gomp_thread *thr, *nthr;
326 struct gomp_task *task;
327 struct gomp_task_icv *icv;
328 bool nested;
329 struct gomp_thread_pool *pool;
330 unsigned i, n, old_threads_used = 0;
331 pthread_attr_t thread_attr, *attr;
332 unsigned long nthreads_var;
333 char bind, bind_var;
334 unsigned int s = 0, rest = 0, p = 0, k = 0;
335 unsigned int affinity_count = 0;
336 struct gomp_thread **affinity_thr = NULL;
337 bool force_display = false;
339 thr = gomp_thread ();
340 nested = thr->ts.level;
341 pool = thr->thread_pool;
342 task = thr->task;
343 icv = task ? &task->icv : &gomp_global_icv;
344 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
346 gomp_init_affinity ();
347 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
348 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
349 thr->place);
352 /* Always save the previous state, even if this isn't a nested team.
353 In particular, we should save any work share state from an outer
354 orphaned work share construct. */
355 team->prev_ts = thr->ts;
357 thr->ts.team = team;
358 thr->ts.team_id = 0;
359 ++thr->ts.level;
360 if (nthreads > 1)
361 ++thr->ts.active_level;
362 thr->ts.work_share = &team->work_shares[0];
363 thr->ts.last_work_share = NULL;
364 #ifdef HAVE_SYNC_BUILTINS
365 thr->ts.single_count = 0;
366 #endif
367 thr->ts.static_trip = 0;
368 thr->task = &team->implicit_task[0];
369 #ifdef GOMP_NEEDS_THREAD_HANDLE
370 thr->handle = pthread_self ();
371 #endif
372 nthreads_var = icv->nthreads_var;
373 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
374 && thr->ts.level < gomp_nthreads_var_list_len)
375 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
376 bind_var = icv->bind_var;
377 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
378 bind_var = flags & 7;
379 bind = bind_var;
380 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
381 && thr->ts.level < gomp_bind_var_list_len)
382 bind_var = gomp_bind_var_list[thr->ts.level];
383 gomp_init_task (thr->task, task, icv);
384 thr->task->taskgroup = taskgroup;
385 team->implicit_task[0].icv.nthreads_var = nthreads_var;
386 team->implicit_task[0].icv.bind_var = bind_var;
388 if (nthreads == 1)
389 return;
391 i = 1;
393 if (__builtin_expect (gomp_places_list != NULL, 0))
395 /* Depending on chosen proc_bind model, set subpartition
396 for the master thread and initialize helper variables
397 P and optionally S, K and/or REST used by later place
398 computation for each additional thread. */
399 p = thr->place - 1;
400 switch (bind)
402 case omp_proc_bind_true:
403 case omp_proc_bind_close:
404 if (nthreads > thr->ts.place_partition_len)
406 /* T > P. S threads will be placed in each place,
407 and the final REM threads placed one by one
408 into the already occupied places. */
409 s = nthreads / thr->ts.place_partition_len;
410 rest = nthreads % thr->ts.place_partition_len;
412 else
413 s = 1;
414 k = 1;
415 break;
416 case omp_proc_bind_master:
417 /* Each thread will be bound to master's place. */
418 break;
419 case omp_proc_bind_spread:
420 if (nthreads <= thr->ts.place_partition_len)
422 /* T <= P. Each subpartition will have in between s
423 and s+1 places (subpartitions starting at or
424 after rest will have s places, earlier s+1 places),
425 each thread will be bound to the first place in
426 its subpartition (except for the master thread
427 that can be bound to another place in its
428 subpartition). */
429 s = thr->ts.place_partition_len / nthreads;
430 rest = thr->ts.place_partition_len % nthreads;
431 rest = (s + 1) * rest + thr->ts.place_partition_off;
432 if (p < rest)
434 p -= (p - thr->ts.place_partition_off) % (s + 1);
435 thr->ts.place_partition_len = s + 1;
437 else
439 p -= (p - rest) % s;
440 thr->ts.place_partition_len = s;
442 thr->ts.place_partition_off = p;
444 else
446 /* T > P. Each subpartition will have just a single
447 place and we'll place between s and s+1
448 threads into each subpartition. */
449 s = nthreads / thr->ts.place_partition_len;
450 rest = nthreads % thr->ts.place_partition_len;
451 thr->ts.place_partition_off = p;
452 thr->ts.place_partition_len = 1;
453 k = 1;
455 break;
458 else
459 bind = omp_proc_bind_false;
461 /* We only allow the reuse of idle threads for non-nested PARALLEL
462 regions. This appears to be implied by the semantics of
463 threadprivate variables, but perhaps that's reading too much into
464 things. Certainly it does prevent any locking problems, since
465 only the initial program thread will modify gomp_threads. */
466 if (!nested)
468 old_threads_used = pool->threads_used;
470 if (nthreads <= old_threads_used)
471 n = nthreads;
472 else if (old_threads_used == 0)
474 n = 0;
475 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
477 else
479 n = old_threads_used;
481 /* Increase the barrier threshold to make sure all new
482 threads arrive before the team is released. */
483 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
486 /* Not true yet, but soon will be. We're going to release all
487 threads from the dock, and those that aren't part of the
488 team will exit. */
489 pool->threads_used = nthreads;
491 /* If necessary, expand the size of the gomp_threads array. It is
492 expected that changes in the number of threads are rare, thus we
493 make no effort to expand gomp_threads_size geometrically. */
494 if (nthreads >= pool->threads_size)
496 pool->threads_size = nthreads + 1;
497 pool->threads
498 = gomp_realloc (pool->threads,
499 pool->threads_size
500 * sizeof (struct gomp_thread *));
501 /* Add current (master) thread to threads[]. */
502 pool->threads[0] = thr;
505 /* Release existing idle threads. */
506 for (; i < n; ++i)
508 unsigned int place_partition_off = thr->ts.place_partition_off;
509 unsigned int place_partition_len = thr->ts.place_partition_len;
510 unsigned int place = 0;
511 if (__builtin_expect (gomp_places_list != NULL, 0))
513 switch (bind)
515 case omp_proc_bind_true:
516 case omp_proc_bind_close:
517 if (k == s)
519 ++p;
520 if (p == (team->prev_ts.place_partition_off
521 + team->prev_ts.place_partition_len))
522 p = team->prev_ts.place_partition_off;
523 k = 1;
524 if (i == nthreads - rest)
525 s = 1;
527 else
528 ++k;
529 break;
530 case omp_proc_bind_master:
531 break;
532 case omp_proc_bind_spread:
533 if (k == 0)
535 /* T <= P. */
536 if (p < rest)
537 p += s + 1;
538 else
539 p += s;
540 if (p == (team->prev_ts.place_partition_off
541 + team->prev_ts.place_partition_len))
542 p = team->prev_ts.place_partition_off;
543 place_partition_off = p;
544 if (p < rest)
545 place_partition_len = s + 1;
546 else
547 place_partition_len = s;
549 else
551 /* T > P. */
552 if (k == s)
554 ++p;
555 if (p == (team->prev_ts.place_partition_off
556 + team->prev_ts.place_partition_len))
557 p = team->prev_ts.place_partition_off;
558 k = 1;
559 if (i == nthreads - rest)
560 s = 1;
562 else
563 ++k;
564 place_partition_off = p;
565 place_partition_len = 1;
567 break;
569 if (affinity_thr != NULL
570 || (bind != omp_proc_bind_true
571 && pool->threads[i]->place != p + 1)
572 || pool->threads[i]->place <= place_partition_off
573 || pool->threads[i]->place > (place_partition_off
574 + place_partition_len))
576 unsigned int l;
577 force_display = true;
578 if (affinity_thr == NULL)
580 unsigned int j;
582 if (team->prev_ts.place_partition_len > 64)
583 affinity_thr
584 = gomp_malloc (team->prev_ts.place_partition_len
585 * sizeof (struct gomp_thread *));
586 else
587 affinity_thr
588 = gomp_alloca (team->prev_ts.place_partition_len
589 * sizeof (struct gomp_thread *));
590 memset (affinity_thr, '\0',
591 team->prev_ts.place_partition_len
592 * sizeof (struct gomp_thread *));
593 for (j = i; j < old_threads_used; j++)
595 if (pool->threads[j]->place
596 > team->prev_ts.place_partition_off
597 && (pool->threads[j]->place
598 <= (team->prev_ts.place_partition_off
599 + team->prev_ts.place_partition_len)))
601 l = pool->threads[j]->place - 1
602 - team->prev_ts.place_partition_off;
603 pool->threads[j]->data = affinity_thr[l];
604 affinity_thr[l] = pool->threads[j];
606 pool->threads[j] = NULL;
608 if (nthreads > old_threads_used)
609 memset (&pool->threads[old_threads_used],
610 '\0', ((nthreads - old_threads_used)
611 * sizeof (struct gomp_thread *)));
612 n = nthreads;
613 affinity_count = old_threads_used - i;
615 if (affinity_count == 0)
616 break;
617 l = p;
618 if (affinity_thr[l - team->prev_ts.place_partition_off]
619 == NULL)
621 if (bind != omp_proc_bind_true)
622 continue;
623 for (l = place_partition_off;
624 l < place_partition_off + place_partition_len;
625 l++)
626 if (affinity_thr[l - team->prev_ts.place_partition_off]
627 != NULL)
628 break;
629 if (l == place_partition_off + place_partition_len)
630 continue;
632 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
633 affinity_thr[l - team->prev_ts.place_partition_off]
634 = (struct gomp_thread *) nthr->data;
635 affinity_count--;
636 pool->threads[i] = nthr;
638 else
639 nthr = pool->threads[i];
640 place = p + 1;
642 else
643 nthr = pool->threads[i];
644 nthr->ts.team = team;
645 nthr->ts.work_share = &team->work_shares[0];
646 nthr->ts.last_work_share = NULL;
647 nthr->ts.team_id = i;
648 nthr->ts.level = team->prev_ts.level + 1;
649 nthr->ts.active_level = thr->ts.active_level;
650 nthr->ts.place_partition_off = place_partition_off;
651 nthr->ts.place_partition_len = place_partition_len;
652 nthr->ts.def_allocator = thr->ts.def_allocator;
653 #ifdef HAVE_SYNC_BUILTINS
654 nthr->ts.single_count = 0;
655 #endif
656 nthr->ts.static_trip = 0;
657 nthr->num_teams = thr->num_teams;
658 nthr->team_num = thr->team_num;
659 nthr->task = &team->implicit_task[i];
660 nthr->place = place;
661 gomp_init_task (nthr->task, task, icv);
662 team->implicit_task[i].icv.nthreads_var = nthreads_var;
663 team->implicit_task[i].icv.bind_var = bind_var;
664 nthr->task->taskgroup = taskgroup;
665 nthr->fn = fn;
666 nthr->data = data;
667 team->ordered_release[i] = &nthr->release;
670 if (__builtin_expect (affinity_thr != NULL, 0))
672 /* If AFFINITY_THR is non-NULL just because we had to
673 permute some threads in the pool, but we've managed
674 to find exactly as many old threads as we'd find
675 without affinity, we don't need to handle this
676 specially anymore. */
677 if (nthreads <= old_threads_used
678 ? (affinity_count == old_threads_used - nthreads)
679 : (i == old_threads_used))
681 if (team->prev_ts.place_partition_len > 64)
682 free (affinity_thr);
683 affinity_thr = NULL;
684 affinity_count = 0;
686 else
688 i = 1;
689 /* We are going to compute the places/subpartitions
690 again from the beginning. So, we need to reinitialize
691 vars modified by the switch (bind) above inside
692 of the loop, to the state they had after the initial
693 switch (bind). */
694 switch (bind)
696 case omp_proc_bind_true:
697 case omp_proc_bind_close:
698 if (nthreads > thr->ts.place_partition_len)
699 /* T > P. S has been changed, so needs
700 to be recomputed. */
701 s = nthreads / thr->ts.place_partition_len;
702 k = 1;
703 p = thr->place - 1;
704 break;
705 case omp_proc_bind_master:
706 /* No vars have been changed. */
707 break;
708 case omp_proc_bind_spread:
709 p = thr->ts.place_partition_off;
710 if (k != 0)
712 /* T > P. */
713 s = nthreads / team->prev_ts.place_partition_len;
714 k = 1;
716 break;
719 /* Increase the barrier threshold to make sure all new
720 threads and all the threads we're going to let die
721 arrive before the team is released. */
722 if (affinity_count)
723 gomp_simple_barrier_reinit (&pool->threads_dock,
724 nthreads + affinity_count);
728 if (i == nthreads)
729 goto do_release;
733 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
735 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
737 if (old_threads_used == 0)
738 --diff;
740 #ifdef HAVE_SYNC_BUILTINS
741 __sync_fetch_and_add (&gomp_managed_threads, diff);
742 #else
743 gomp_mutex_lock (&gomp_managed_threads_lock);
744 gomp_managed_threads += diff;
745 gomp_mutex_unlock (&gomp_managed_threads_lock);
746 #endif
749 attr = &gomp_thread_attr;
750 if (__builtin_expect (gomp_places_list != NULL, 0))
752 size_t stacksize;
753 pthread_attr_init (&thread_attr);
754 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
755 pthread_attr_setstacksize (&thread_attr, stacksize);
756 attr = &thread_attr;
759 if (i >= nthreads)
760 __builtin_unreachable ();
761 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
762 * (nthreads - i));
764 /* Launch new threads. */
765 for (; i < nthreads; ++i)
767 int err;
769 start_data->ts.place_partition_off = thr->ts.place_partition_off;
770 start_data->ts.place_partition_len = thr->ts.place_partition_len;
771 start_data->place = 0;
772 if (__builtin_expect (gomp_places_list != NULL, 0))
774 switch (bind)
776 case omp_proc_bind_true:
777 case omp_proc_bind_close:
778 if (k == s)
780 ++p;
781 if (p == (team->prev_ts.place_partition_off
782 + team->prev_ts.place_partition_len))
783 p = team->prev_ts.place_partition_off;
784 k = 1;
785 if (i == nthreads - rest)
786 s = 1;
788 else
789 ++k;
790 break;
791 case omp_proc_bind_master:
792 break;
793 case omp_proc_bind_spread:
794 if (k == 0)
796 /* T <= P. */
797 if (p < rest)
798 p += s + 1;
799 else
800 p += s;
801 if (p == (team->prev_ts.place_partition_off
802 + team->prev_ts.place_partition_len))
803 p = team->prev_ts.place_partition_off;
804 start_data->ts.place_partition_off = p;
805 if (p < rest)
806 start_data->ts.place_partition_len = s + 1;
807 else
808 start_data->ts.place_partition_len = s;
810 else
812 /* T > P. */
813 if (k == s)
815 ++p;
816 if (p == (team->prev_ts.place_partition_off
817 + team->prev_ts.place_partition_len))
818 p = team->prev_ts.place_partition_off;
819 k = 1;
820 if (i == nthreads - rest)
821 s = 1;
823 else
824 ++k;
825 start_data->ts.place_partition_off = p;
826 start_data->ts.place_partition_len = 1;
828 break;
830 start_data->place = p + 1;
831 if (affinity_thr != NULL && pool->threads[i] != NULL)
832 continue;
833 gomp_init_thread_affinity (attr, p);
836 start_data->fn = fn;
837 start_data->fn_data = data;
838 start_data->ts.team = team;
839 start_data->ts.work_share = &team->work_shares[0];
840 start_data->ts.last_work_share = NULL;
841 start_data->ts.team_id = i;
842 start_data->ts.level = team->prev_ts.level + 1;
843 start_data->ts.active_level = thr->ts.active_level;
844 start_data->ts.def_allocator = thr->ts.def_allocator;
845 #ifdef HAVE_SYNC_BUILTINS
846 start_data->ts.single_count = 0;
847 #endif
848 start_data->ts.static_trip = 0;
849 start_data->num_teams = thr->num_teams;
850 start_data->team_num = thr->team_num;
851 start_data->task = &team->implicit_task[i];
852 gomp_init_task (start_data->task, task, icv);
853 team->implicit_task[i].icv.nthreads_var = nthreads_var;
854 team->implicit_task[i].icv.bind_var = bind_var;
855 start_data->task->taskgroup = taskgroup;
856 start_data->thread_pool = pool;
857 start_data->nested = nested;
859 attr = gomp_adjust_thread_attr (attr, &thread_attr);
860 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
861 start_data);
862 start_data++;
863 if (err != 0)
864 gomp_fatal ("Thread creation failed: %s", strerror (err));
867 if (__builtin_expect (attr == &thread_attr, 0))
868 pthread_attr_destroy (&thread_attr);
870 do_release:
871 if (nested)
872 gomp_barrier_wait (&team->barrier);
873 else
874 gomp_simple_barrier_wait (&pool->threads_dock);
876 /* Decrease the barrier threshold to match the number of threads
877 that should arrive back at the end of this team. The extra
878 threads should be exiting. Note that we arrange for this test
879 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
880 the barrier as well as gomp_managed_threads was temporarily
881 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
882 AFFINITY_COUNT if non-zero will be always at least
883 OLD_THREADS_COUNT - NTHREADS. */
884 if (__builtin_expect (nthreads < old_threads_used, 0)
885 || __builtin_expect (affinity_count, 0))
887 long diff = (long) nthreads - (long) old_threads_used;
889 if (affinity_count)
890 diff = -affinity_count;
892 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
894 #ifdef HAVE_SYNC_BUILTINS
895 __sync_fetch_and_add (&gomp_managed_threads, diff);
896 #else
897 gomp_mutex_lock (&gomp_managed_threads_lock);
898 gomp_managed_threads += diff;
899 gomp_mutex_unlock (&gomp_managed_threads_lock);
900 #endif
902 if (__builtin_expect (gomp_display_affinity_var, 0))
904 if (nested
905 || nthreads != old_threads_used
906 || force_display)
908 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
909 thr->place);
910 if (nested)
912 start_data -= nthreads - 1;
913 for (i = 1; i < nthreads; ++i)
915 gomp_display_affinity_thread (
916 #ifdef LIBGOMP_USE_PTHREADS
917 start_data->handle,
918 #else
919 gomp_thread_self (),
920 #endif
921 &start_data->ts,
922 start_data->place);
923 start_data++;
926 else
928 for (i = 1; i < nthreads; ++i)
930 gomp_thread_handle handle
931 = gomp_thread_to_pthread_t (pool->threads[i]);
932 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
933 pool->threads[i]->place);
938 if (__builtin_expect (affinity_thr != NULL, 0)
939 && team->prev_ts.place_partition_len > 64)
940 free (affinity_thr);
942 #endif
945 /* Terminate the current team. This is only to be called by the master
946 thread. We assume that we must wait for the other threads. */
948 void
949 gomp_team_end (void)
951 struct gomp_thread *thr = gomp_thread ();
952 struct gomp_team *team = thr->ts.team;
954 /* This barrier handles all pending explicit threads.
955 As #pragma omp cancel parallel might get awaited count in
956 team->barrier in a inconsistent state, we need to use a different
957 counter here. */
958 gomp_team_barrier_wait_final (&team->barrier);
959 if (__builtin_expect (team->team_cancelled, 0))
961 struct gomp_work_share *ws = team->work_shares_to_free;
964 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
965 if (next_ws == NULL)
966 gomp_ptrlock_set (&ws->next_ws, ws);
967 gomp_fini_work_share (ws);
968 ws = next_ws;
970 while (ws != NULL);
972 else
973 gomp_fini_work_share (thr->ts.work_share);
975 gomp_end_task ();
976 thr->ts = team->prev_ts;
978 if (__builtin_expect (thr->ts.level != 0, 0))
980 #ifdef HAVE_SYNC_BUILTINS
981 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
982 #else
983 gomp_mutex_lock (&gomp_managed_threads_lock);
984 gomp_managed_threads -= team->nthreads - 1L;
985 gomp_mutex_unlock (&gomp_managed_threads_lock);
986 #endif
987 /* This barrier has gomp_barrier_wait_last counterparts
988 and ensures the team can be safely destroyed. */
989 gomp_barrier_wait (&team->barrier);
992 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
994 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
997 struct gomp_work_share *next_ws = ws->next_alloc;
998 free (ws);
999 ws = next_ws;
1001 while (ws != NULL);
1003 gomp_sem_destroy (&team->master_release);
1005 if (__builtin_expect (thr->ts.team != NULL, 0)
1006 || __builtin_expect (team->nthreads == 1, 0))
1007 free_team (team);
1008 else
1010 struct gomp_thread_pool *pool = thr->thread_pool;
1011 if (pool->last_team)
1012 free_team (pool->last_team);
1013 pool->last_team = team;
1014 gomp_release_thread_pool (pool);
1018 #ifdef LIBGOMP_USE_PTHREADS
1020 /* Constructors for this file. */
1022 static void __attribute__((constructor))
1023 initialize_team (void)
1025 #if !defined HAVE_TLS && !defined USE_EMUTLS
1026 static struct gomp_thread initial_thread_tls_data;
1028 pthread_key_create (&gomp_tls_key, NULL);
1029 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1030 #endif
1032 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1033 gomp_fatal ("could not create thread pool destructor.");
1036 static void __attribute__((destructor))
1037 team_destructor (void)
1039 /* Without this dlclose on libgomp could lead to subsequent
1040 crashes. */
1041 pthread_key_delete (gomp_thread_destructor);
1044 /* Similar to gomp_free_pool_helper, but don't detach itself,
1045 gomp_pause_host will pthread_join those threads. */
1047 static void
1048 gomp_pause_pool_helper (void *thread_pool)
1050 struct gomp_thread *thr = gomp_thread ();
1051 struct gomp_thread_pool *pool
1052 = (struct gomp_thread_pool *) thread_pool;
1053 gomp_simple_barrier_wait_last (&pool->threads_dock);
1054 gomp_sem_destroy (&thr->release);
1055 thr->thread_pool = NULL;
1056 thr->task = NULL;
1057 pthread_exit (NULL);
1060 /* Free a thread pool and release its threads. Return non-zero on
1061 failure. */
1064 gomp_pause_host (void)
1066 struct gomp_thread *thr = gomp_thread ();
1067 struct gomp_thread_pool *pool = thr->thread_pool;
1068 if (thr->ts.level)
1069 return -1;
1070 if (pool)
1072 if (pool->threads_used > 0)
1074 int i;
1075 pthread_t *thrs
1076 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1077 for (i = 1; i < pool->threads_used; i++)
1079 struct gomp_thread *nthr = pool->threads[i];
1080 nthr->fn = gomp_pause_pool_helper;
1081 nthr->data = pool;
1082 thrs[i] = gomp_thread_to_pthread_t (nthr);
1084 /* This barrier undocks threads docked on pool->threads_dock. */
1085 gomp_simple_barrier_wait (&pool->threads_dock);
1086 /* And this waits till all threads have called gomp_barrier_wait_last
1087 in gomp_pause_pool_helper. */
1088 gomp_simple_barrier_wait (&pool->threads_dock);
1089 /* Now it is safe to destroy the barrier and free the pool. */
1090 gomp_simple_barrier_destroy (&pool->threads_dock);
1092 #ifdef HAVE_SYNC_BUILTINS
1093 __sync_fetch_and_add (&gomp_managed_threads,
1094 1L - pool->threads_used);
1095 #else
1096 gomp_mutex_lock (&gomp_managed_threads_lock);
1097 gomp_managed_threads -= pool->threads_used - 1L;
1098 gomp_mutex_unlock (&gomp_managed_threads_lock);
1099 #endif
1100 for (i = 1; i < pool->threads_used; i++)
1101 pthread_join (thrs[i], NULL);
1103 if (pool->last_team)
1104 free_team (pool->last_team);
1105 #ifndef __nvptx__
1106 team_free (pool->threads);
1107 team_free (pool);
1108 #endif
1109 thr->thread_pool = NULL;
1111 return 0;
1113 #endif
1115 struct gomp_task_icv *
1116 gomp_new_icv (void)
1118 struct gomp_thread *thr = gomp_thread ();
1119 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1120 gomp_init_task (task, NULL, &gomp_global_icv);
1121 thr->task = task;
1122 #ifdef LIBGOMP_USE_PTHREADS
1123 pthread_setspecific (gomp_thread_destructor, thr);
1124 #endif
1125 return &task->icv;