Daily bump.
[official-gcc.git] / libgomp / team.c
blob3bcc8174d1d079a71aaf150750db8d6bff358ab1
1 /* Copyright (C) 2005-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 unsigned int num_teams;
60 unsigned int team_num;
61 bool nested;
62 pthread_t handle;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
69 static void *
70 gomp_thread_start (void *xdata)
72 struct gomp_thread_start_data *data = xdata;
73 struct gomp_thread *thr;
74 struct gomp_thread_pool *pool;
75 void (*local_fn) (void *);
76 void *local_data;
78 #if defined HAVE_TLS || defined USE_EMUTLS
79 thr = &gomp_tls_data;
80 #else
81 struct gomp_thread local_thr;
82 thr = &local_thr;
83 #endif
84 gomp_sem_init (&thr->release, 0);
86 /* Extract what we need from data. */
87 local_fn = data->fn;
88 local_data = data->fn_data;
89 thr->thread_pool = data->thread_pool;
90 thr->ts = data->ts;
91 thr->task = data->task;
92 thr->place = data->place;
93 thr->num_teams = data->num_teams;
94 thr->team_num = data->team_num;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr->handle = data->handle;
97 #endif
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key, thr);
100 #endif
102 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
104 /* Make thread pool local. */
105 pool = thr->thread_pool;
107 if (data->nested)
109 struct gomp_team *team = thr->ts.team;
110 struct gomp_task *task = thr->task;
112 gomp_barrier_wait (&team->barrier);
114 local_fn (local_data);
115 gomp_team_barrier_wait_final (&team->barrier);
116 gomp_finish_task (task);
117 gomp_barrier_wait_last (&team->barrier);
119 else
121 pool->threads[thr->ts.team_id] = thr;
123 gomp_simple_barrier_wait (&pool->threads_dock);
126 struct gomp_team *team = thr->ts.team;
127 struct gomp_task *task = thr->task;
129 local_fn (local_data);
130 gomp_team_barrier_wait_final (&team->barrier);
131 gomp_finish_task (task);
133 gomp_simple_barrier_wait (&pool->threads_dock);
135 local_fn = thr->fn;
136 local_data = thr->data;
137 thr->fn = NULL;
139 while (local_fn);
142 gomp_sem_destroy (&thr->release);
143 pthread_detach (pthread_self ());
144 thr->thread_pool = NULL;
145 thr->task = NULL;
146 return NULL;
148 #endif
150 static inline struct gomp_team *
151 get_last_team (unsigned nthreads)
153 struct gomp_thread *thr = gomp_thread ();
154 if (thr->ts.team == NULL)
156 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157 struct gomp_team *last_team = pool->last_team;
158 if (last_team != NULL && last_team->nthreads == nthreads)
160 pool->last_team = NULL;
161 return last_team;
164 return NULL;
167 /* Create a new team data structure. */
169 struct gomp_team *
170 gomp_new_team (unsigned nthreads)
172 struct gomp_team *team;
173 int i;
175 team = get_last_team (nthreads);
176 if (team == NULL)
178 size_t extra = sizeof (team->ordered_release[0])
179 + sizeof (team->implicit_task[0]);
180 team = team_malloc (sizeof (*team) + nthreads * extra);
182 #ifndef HAVE_SYNC_BUILTINS
183 gomp_mutex_init (&team->work_share_list_free_lock);
184 #endif
185 gomp_barrier_init (&team->barrier, nthreads);
186 gomp_mutex_init (&team->task_lock);
188 team->nthreads = nthreads;
191 team->work_share_chunk = 8;
192 #ifdef HAVE_SYNC_BUILTINS
193 team->single_count = 0;
194 #endif
195 team->work_shares_to_free = &team->work_shares[0];
196 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
197 team->work_shares[0].next_alloc = NULL;
198 team->work_share_list_free = NULL;
199 team->work_share_list_alloc = &team->work_shares[1];
200 for (i = 1; i < 7; i++)
201 team->work_shares[i].next_free = &team->work_shares[i + 1];
202 team->work_shares[i].next_free = NULL;
204 gomp_sem_init (&team->master_release, 0);
205 team->ordered_release = (void *) &team->implicit_task[nthreads];
206 team->ordered_release[0] = &team->master_release;
208 priority_queue_init (&team->task_queue);
209 team->task_count = 0;
210 team->task_queued_count = 0;
211 team->task_running_count = 0;
212 team->work_share_cancelled = 0;
213 team->team_cancelled = 0;
215 team->task_detach_count = 0;
217 return team;
221 /* Free a team data structure. */
223 static void
224 free_team (struct gomp_team *team)
226 #ifndef HAVE_SYNC_BUILTINS
227 gomp_mutex_destroy (&team->work_share_list_free_lock);
228 #endif
229 gomp_barrier_destroy (&team->barrier);
230 gomp_mutex_destroy (&team->task_lock);
231 priority_queue_free (&team->task_queue);
232 team_free (team);
235 static void
236 gomp_free_pool_helper (void *thread_pool)
238 struct gomp_thread *thr = gomp_thread ();
239 struct gomp_thread_pool *pool
240 = (struct gomp_thread_pool *) thread_pool;
241 gomp_simple_barrier_wait_last (&pool->threads_dock);
242 gomp_sem_destroy (&thr->release);
243 thr->thread_pool = NULL;
244 thr->task = NULL;
245 #ifdef LIBGOMP_USE_PTHREADS
246 pthread_detach (pthread_self ());
247 pthread_exit (NULL);
248 #elif defined(__nvptx__)
249 asm ("exit;");
250 #elif defined(__AMDGCN__)
251 asm ("s_dcache_wb\n\t"
252 "s_endpgm");
253 #else
254 #error gomp_free_pool_helper must terminate the thread
255 #endif
258 /* Free a thread pool and release its threads. */
260 void
261 gomp_free_thread (void *arg __attribute__((unused)))
263 struct gomp_thread *thr = gomp_thread ();
264 struct gomp_thread_pool *pool = thr->thread_pool;
265 if (pool)
267 if (pool->threads_used > 0)
269 int i;
270 for (i = 1; i < pool->threads_used; i++)
272 struct gomp_thread *nthr = pool->threads[i];
273 nthr->fn = gomp_free_pool_helper;
274 nthr->data = pool;
276 /* This barrier undocks threads docked on pool->threads_dock. */
277 gomp_simple_barrier_wait (&pool->threads_dock);
278 /* And this waits till all threads have called gomp_barrier_wait_last
279 in gomp_free_pool_helper. */
280 gomp_simple_barrier_wait (&pool->threads_dock);
281 /* Now it is safe to destroy the barrier and free the pool. */
282 gomp_simple_barrier_destroy (&pool->threads_dock);
284 #ifdef HAVE_SYNC_BUILTINS
285 __sync_fetch_and_add (&gomp_managed_threads,
286 1L - pool->threads_used);
287 #else
288 gomp_mutex_lock (&gomp_managed_threads_lock);
289 gomp_managed_threads -= pool->threads_used - 1L;
290 gomp_mutex_unlock (&gomp_managed_threads_lock);
291 #endif
293 if (pool->last_team)
294 free_team (pool->last_team);
295 #ifndef __nvptx__
296 team_free (pool->threads);
297 team_free (pool);
298 #endif
299 thr->thread_pool = NULL;
301 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
302 gomp_team_end ();
303 if (thr->task != NULL)
305 struct gomp_task *task = thr->task;
306 gomp_end_task ();
307 free (task);
311 /* Launch a team. */
313 #ifdef LIBGOMP_USE_PTHREADS
314 void
315 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
316 unsigned flags, struct gomp_team *team,
317 struct gomp_taskgroup *taskgroup)
319 struct gomp_thread_start_data *start_data = NULL;
320 struct gomp_thread *thr, *nthr;
321 struct gomp_task *task;
322 struct gomp_task_icv *icv;
323 bool nested;
324 struct gomp_thread_pool *pool;
325 unsigned i, n, old_threads_used = 0;
326 pthread_attr_t thread_attr, *attr;
327 unsigned long nthreads_var;
328 char bind, bind_var;
329 unsigned int s = 0, rest = 0, p = 0, k = 0;
330 unsigned int affinity_count = 0;
331 struct gomp_thread **affinity_thr = NULL;
332 bool force_display = false;
334 thr = gomp_thread ();
335 nested = thr->ts.level;
336 pool = thr->thread_pool;
337 task = thr->task;
338 icv = task ? &task->icv : &gomp_global_icv;
339 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
341 gomp_init_affinity ();
342 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
343 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
344 thr->place);
347 /* Always save the previous state, even if this isn't a nested team.
348 In particular, we should save any work share state from an outer
349 orphaned work share construct. */
350 team->prev_ts = thr->ts;
352 thr->ts.team = team;
353 thr->ts.team_id = 0;
354 ++thr->ts.level;
355 if (nthreads > 1)
356 ++thr->ts.active_level;
357 thr->ts.work_share = &team->work_shares[0];
358 thr->ts.last_work_share = NULL;
359 #ifdef HAVE_SYNC_BUILTINS
360 thr->ts.single_count = 0;
361 #endif
362 thr->ts.static_trip = 0;
363 thr->task = &team->implicit_task[0];
364 #ifdef GOMP_NEEDS_THREAD_HANDLE
365 thr->handle = pthread_self ();
366 #endif
367 nthreads_var = icv->nthreads_var;
368 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
369 && thr->ts.level < gomp_nthreads_var_list_len)
370 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
371 bind_var = icv->bind_var;
372 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
373 bind_var = flags & 7;
374 bind = bind_var;
375 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
376 && thr->ts.level < gomp_bind_var_list_len)
377 bind_var = gomp_bind_var_list[thr->ts.level];
378 gomp_init_task (thr->task, task, icv);
379 thr->task->taskgroup = taskgroup;
380 team->implicit_task[0].icv.nthreads_var = nthreads_var;
381 team->implicit_task[0].icv.bind_var = bind_var;
383 if (nthreads == 1)
384 return;
386 i = 1;
388 if (__builtin_expect (gomp_places_list != NULL, 0))
390 /* Depending on chosen proc_bind model, set subpartition
391 for the master thread and initialize helper variables
392 P and optionally S, K and/or REST used by later place
393 computation for each additional thread. */
394 p = thr->place - 1;
395 switch (bind)
397 case omp_proc_bind_true:
398 case omp_proc_bind_close:
399 if (nthreads > thr->ts.place_partition_len)
401 /* T > P. S threads will be placed in each place,
402 and the final REM threads placed one by one
403 into the already occupied places. */
404 s = nthreads / thr->ts.place_partition_len;
405 rest = nthreads % thr->ts.place_partition_len;
407 else
408 s = 1;
409 k = 1;
410 break;
411 case omp_proc_bind_master:
412 /* Each thread will be bound to master's place. */
413 break;
414 case omp_proc_bind_spread:
415 if (nthreads <= thr->ts.place_partition_len)
417 /* T <= P. Each subpartition will have in between s
418 and s+1 places (subpartitions starting at or
419 after rest will have s places, earlier s+1 places),
420 each thread will be bound to the first place in
421 its subpartition (except for the master thread
422 that can be bound to another place in its
423 subpartition). */
424 s = thr->ts.place_partition_len / nthreads;
425 rest = thr->ts.place_partition_len % nthreads;
426 rest = (s + 1) * rest + thr->ts.place_partition_off;
427 if (p < rest)
429 p -= (p - thr->ts.place_partition_off) % (s + 1);
430 thr->ts.place_partition_len = s + 1;
432 else
434 p -= (p - rest) % s;
435 thr->ts.place_partition_len = s;
437 thr->ts.place_partition_off = p;
439 else
441 /* T > P. Each subpartition will have just a single
442 place and we'll place between s and s+1
443 threads into each subpartition. */
444 s = nthreads / thr->ts.place_partition_len;
445 rest = nthreads % thr->ts.place_partition_len;
446 thr->ts.place_partition_off = p;
447 thr->ts.place_partition_len = 1;
448 k = 1;
450 break;
453 else
454 bind = omp_proc_bind_false;
456 /* We only allow the reuse of idle threads for non-nested PARALLEL
457 regions. This appears to be implied by the semantics of
458 threadprivate variables, but perhaps that's reading too much into
459 things. Certainly it does prevent any locking problems, since
460 only the initial program thread will modify gomp_threads. */
461 if (!nested)
463 old_threads_used = pool->threads_used;
465 if (nthreads <= old_threads_used)
466 n = nthreads;
467 else if (old_threads_used == 0)
469 n = 0;
470 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
472 else
474 n = old_threads_used;
476 /* Increase the barrier threshold to make sure all new
477 threads arrive before the team is released. */
478 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
481 /* Not true yet, but soon will be. We're going to release all
482 threads from the dock, and those that aren't part of the
483 team will exit. */
484 pool->threads_used = nthreads;
486 /* If necessary, expand the size of the gomp_threads array. It is
487 expected that changes in the number of threads are rare, thus we
488 make no effort to expand gomp_threads_size geometrically. */
489 if (nthreads >= pool->threads_size)
491 pool->threads_size = nthreads + 1;
492 pool->threads
493 = gomp_realloc (pool->threads,
494 pool->threads_size
495 * sizeof (struct gomp_thread *));
496 /* Add current (master) thread to threads[]. */
497 pool->threads[0] = thr;
500 /* Release existing idle threads. */
501 for (; i < n; ++i)
503 unsigned int place_partition_off = thr->ts.place_partition_off;
504 unsigned int place_partition_len = thr->ts.place_partition_len;
505 unsigned int place = 0;
506 if (__builtin_expect (gomp_places_list != NULL, 0))
508 switch (bind)
510 case omp_proc_bind_true:
511 case omp_proc_bind_close:
512 if (k == s)
514 ++p;
515 if (p == (team->prev_ts.place_partition_off
516 + team->prev_ts.place_partition_len))
517 p = team->prev_ts.place_partition_off;
518 k = 1;
519 if (i == nthreads - rest)
520 s = 1;
522 else
523 ++k;
524 break;
525 case omp_proc_bind_master:
526 break;
527 case omp_proc_bind_spread:
528 if (k == 0)
530 /* T <= P. */
531 if (p < rest)
532 p += s + 1;
533 else
534 p += s;
535 if (p == (team->prev_ts.place_partition_off
536 + team->prev_ts.place_partition_len))
537 p = team->prev_ts.place_partition_off;
538 place_partition_off = p;
539 if (p < rest)
540 place_partition_len = s + 1;
541 else
542 place_partition_len = s;
544 else
546 /* T > P. */
547 if (k == s)
549 ++p;
550 if (p == (team->prev_ts.place_partition_off
551 + team->prev_ts.place_partition_len))
552 p = team->prev_ts.place_partition_off;
553 k = 1;
554 if (i == nthreads - rest)
555 s = 1;
557 else
558 ++k;
559 place_partition_off = p;
560 place_partition_len = 1;
562 break;
564 if (affinity_thr != NULL
565 || (bind != omp_proc_bind_true
566 && pool->threads[i]->place != p + 1)
567 || pool->threads[i]->place <= place_partition_off
568 || pool->threads[i]->place > (place_partition_off
569 + place_partition_len))
571 unsigned int l;
572 force_display = true;
573 if (affinity_thr == NULL)
575 unsigned int j;
577 if (team->prev_ts.place_partition_len > 64)
578 affinity_thr
579 = gomp_malloc (team->prev_ts.place_partition_len
580 * sizeof (struct gomp_thread *));
581 else
582 affinity_thr
583 = gomp_alloca (team->prev_ts.place_partition_len
584 * sizeof (struct gomp_thread *));
585 memset (affinity_thr, '\0',
586 team->prev_ts.place_partition_len
587 * sizeof (struct gomp_thread *));
588 for (j = i; j < old_threads_used; j++)
590 if (pool->threads[j]->place
591 > team->prev_ts.place_partition_off
592 && (pool->threads[j]->place
593 <= (team->prev_ts.place_partition_off
594 + team->prev_ts.place_partition_len)))
596 l = pool->threads[j]->place - 1
597 - team->prev_ts.place_partition_off;
598 pool->threads[j]->data = affinity_thr[l];
599 affinity_thr[l] = pool->threads[j];
601 pool->threads[j] = NULL;
603 if (nthreads > old_threads_used)
604 memset (&pool->threads[old_threads_used],
605 '\0', ((nthreads - old_threads_used)
606 * sizeof (struct gomp_thread *)));
607 n = nthreads;
608 affinity_count = old_threads_used - i;
610 if (affinity_count == 0)
611 break;
612 l = p;
613 if (affinity_thr[l - team->prev_ts.place_partition_off]
614 == NULL)
616 if (bind != omp_proc_bind_true)
617 continue;
618 for (l = place_partition_off;
619 l < place_partition_off + place_partition_len;
620 l++)
621 if (affinity_thr[l - team->prev_ts.place_partition_off]
622 != NULL)
623 break;
624 if (l == place_partition_off + place_partition_len)
625 continue;
627 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
628 affinity_thr[l - team->prev_ts.place_partition_off]
629 = (struct gomp_thread *) nthr->data;
630 affinity_count--;
631 pool->threads[i] = nthr;
633 else
634 nthr = pool->threads[i];
635 place = p + 1;
637 else
638 nthr = pool->threads[i];
639 nthr->ts.team = team;
640 nthr->ts.work_share = &team->work_shares[0];
641 nthr->ts.last_work_share = NULL;
642 nthr->ts.team_id = i;
643 nthr->ts.level = team->prev_ts.level + 1;
644 nthr->ts.active_level = thr->ts.active_level;
645 nthr->ts.place_partition_off = place_partition_off;
646 nthr->ts.place_partition_len = place_partition_len;
647 nthr->ts.def_allocator = thr->ts.def_allocator;
648 #ifdef HAVE_SYNC_BUILTINS
649 nthr->ts.single_count = 0;
650 #endif
651 nthr->ts.static_trip = 0;
652 nthr->num_teams = thr->num_teams;
653 nthr->team_num = thr->team_num;
654 nthr->task = &team->implicit_task[i];
655 nthr->place = place;
656 gomp_init_task (nthr->task, task, icv);
657 team->implicit_task[i].icv.nthreads_var = nthreads_var;
658 team->implicit_task[i].icv.bind_var = bind_var;
659 nthr->task->taskgroup = taskgroup;
660 nthr->fn = fn;
661 nthr->data = data;
662 team->ordered_release[i] = &nthr->release;
665 if (__builtin_expect (affinity_thr != NULL, 0))
667 /* If AFFINITY_THR is non-NULL just because we had to
668 permute some threads in the pool, but we've managed
669 to find exactly as many old threads as we'd find
670 without affinity, we don't need to handle this
671 specially anymore. */
672 if (nthreads <= old_threads_used
673 ? (affinity_count == old_threads_used - nthreads)
674 : (i == old_threads_used))
676 if (team->prev_ts.place_partition_len > 64)
677 free (affinity_thr);
678 affinity_thr = NULL;
679 affinity_count = 0;
681 else
683 i = 1;
684 /* We are going to compute the places/subpartitions
685 again from the beginning. So, we need to reinitialize
686 vars modified by the switch (bind) above inside
687 of the loop, to the state they had after the initial
688 switch (bind). */
689 switch (bind)
691 case omp_proc_bind_true:
692 case omp_proc_bind_close:
693 if (nthreads > thr->ts.place_partition_len)
694 /* T > P. S has been changed, so needs
695 to be recomputed. */
696 s = nthreads / thr->ts.place_partition_len;
697 k = 1;
698 p = thr->place - 1;
699 break;
700 case omp_proc_bind_master:
701 /* No vars have been changed. */
702 break;
703 case omp_proc_bind_spread:
704 p = thr->ts.place_partition_off;
705 if (k != 0)
707 /* T > P. */
708 s = nthreads / team->prev_ts.place_partition_len;
709 k = 1;
711 break;
714 /* Increase the barrier threshold to make sure all new
715 threads and all the threads we're going to let die
716 arrive before the team is released. */
717 if (affinity_count)
718 gomp_simple_barrier_reinit (&pool->threads_dock,
719 nthreads + affinity_count);
723 if (i == nthreads)
724 goto do_release;
728 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
730 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
732 if (old_threads_used == 0)
733 --diff;
735 #ifdef HAVE_SYNC_BUILTINS
736 __sync_fetch_and_add (&gomp_managed_threads, diff);
737 #else
738 gomp_mutex_lock (&gomp_managed_threads_lock);
739 gomp_managed_threads += diff;
740 gomp_mutex_unlock (&gomp_managed_threads_lock);
741 #endif
744 attr = &gomp_thread_attr;
745 if (__builtin_expect (gomp_places_list != NULL, 0))
747 size_t stacksize;
748 pthread_attr_init (&thread_attr);
749 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
750 pthread_attr_setstacksize (&thread_attr, stacksize);
751 attr = &thread_attr;
754 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
755 * (nthreads - i));
757 /* Launch new threads. */
758 for (; i < nthreads; ++i)
760 int err;
762 start_data->ts.place_partition_off = thr->ts.place_partition_off;
763 start_data->ts.place_partition_len = thr->ts.place_partition_len;
764 start_data->place = 0;
765 if (__builtin_expect (gomp_places_list != NULL, 0))
767 switch (bind)
769 case omp_proc_bind_true:
770 case omp_proc_bind_close:
771 if (k == s)
773 ++p;
774 if (p == (team->prev_ts.place_partition_off
775 + team->prev_ts.place_partition_len))
776 p = team->prev_ts.place_partition_off;
777 k = 1;
778 if (i == nthreads - rest)
779 s = 1;
781 else
782 ++k;
783 break;
784 case omp_proc_bind_master:
785 break;
786 case omp_proc_bind_spread:
787 if (k == 0)
789 /* T <= P. */
790 if (p < rest)
791 p += s + 1;
792 else
793 p += s;
794 if (p == (team->prev_ts.place_partition_off
795 + team->prev_ts.place_partition_len))
796 p = team->prev_ts.place_partition_off;
797 start_data->ts.place_partition_off = p;
798 if (p < rest)
799 start_data->ts.place_partition_len = s + 1;
800 else
801 start_data->ts.place_partition_len = s;
803 else
805 /* T > P. */
806 if (k == s)
808 ++p;
809 if (p == (team->prev_ts.place_partition_off
810 + team->prev_ts.place_partition_len))
811 p = team->prev_ts.place_partition_off;
812 k = 1;
813 if (i == nthreads - rest)
814 s = 1;
816 else
817 ++k;
818 start_data->ts.place_partition_off = p;
819 start_data->ts.place_partition_len = 1;
821 break;
823 start_data->place = p + 1;
824 if (affinity_thr != NULL && pool->threads[i] != NULL)
825 continue;
826 gomp_init_thread_affinity (attr, p);
829 start_data->fn = fn;
830 start_data->fn_data = data;
831 start_data->ts.team = team;
832 start_data->ts.work_share = &team->work_shares[0];
833 start_data->ts.last_work_share = NULL;
834 start_data->ts.team_id = i;
835 start_data->ts.level = team->prev_ts.level + 1;
836 start_data->ts.active_level = thr->ts.active_level;
837 start_data->ts.def_allocator = thr->ts.def_allocator;
838 #ifdef HAVE_SYNC_BUILTINS
839 start_data->ts.single_count = 0;
840 #endif
841 start_data->ts.static_trip = 0;
842 start_data->num_teams = thr->num_teams;
843 start_data->team_num = thr->team_num;
844 start_data->task = &team->implicit_task[i];
845 gomp_init_task (start_data->task, task, icv);
846 team->implicit_task[i].icv.nthreads_var = nthreads_var;
847 team->implicit_task[i].icv.bind_var = bind_var;
848 start_data->task->taskgroup = taskgroup;
849 start_data->thread_pool = pool;
850 start_data->nested = nested;
852 attr = gomp_adjust_thread_attr (attr, &thread_attr);
853 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
854 start_data);
855 start_data++;
856 if (err != 0)
857 gomp_fatal ("Thread creation failed: %s", strerror (err));
860 if (__builtin_expect (attr == &thread_attr, 0))
861 pthread_attr_destroy (&thread_attr);
863 do_release:
864 if (nested)
865 gomp_barrier_wait (&team->barrier);
866 else
867 gomp_simple_barrier_wait (&pool->threads_dock);
869 /* Decrease the barrier threshold to match the number of threads
870 that should arrive back at the end of this team. The extra
871 threads should be exiting. Note that we arrange for this test
872 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
873 the barrier as well as gomp_managed_threads was temporarily
874 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
875 AFFINITY_COUNT if non-zero will be always at least
876 OLD_THREADS_COUNT - NTHREADS. */
877 if (__builtin_expect (nthreads < old_threads_used, 0)
878 || __builtin_expect (affinity_count, 0))
880 long diff = (long) nthreads - (long) old_threads_used;
882 if (affinity_count)
883 diff = -affinity_count;
885 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
887 #ifdef HAVE_SYNC_BUILTINS
888 __sync_fetch_and_add (&gomp_managed_threads, diff);
889 #else
890 gomp_mutex_lock (&gomp_managed_threads_lock);
891 gomp_managed_threads += diff;
892 gomp_mutex_unlock (&gomp_managed_threads_lock);
893 #endif
895 if (__builtin_expect (gomp_display_affinity_var, 0))
897 if (nested
898 || nthreads != old_threads_used
899 || force_display)
901 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
902 thr->place);
903 if (nested)
905 start_data -= nthreads - 1;
906 for (i = 1; i < nthreads; ++i)
908 gomp_display_affinity_thread (
909 #ifdef LIBGOMP_USE_PTHREADS
910 start_data->handle,
911 #else
912 gomp_thread_self (),
913 #endif
914 &start_data->ts,
915 start_data->place);
916 start_data++;
919 else
921 for (i = 1; i < nthreads; ++i)
923 gomp_thread_handle handle
924 = gomp_thread_to_pthread_t (pool->threads[i]);
925 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
926 pool->threads[i]->place);
931 if (__builtin_expect (affinity_thr != NULL, 0)
932 && team->prev_ts.place_partition_len > 64)
933 free (affinity_thr);
935 #endif
938 /* Terminate the current team. This is only to be called by the master
939 thread. We assume that we must wait for the other threads. */
941 void
942 gomp_team_end (void)
944 struct gomp_thread *thr = gomp_thread ();
945 struct gomp_team *team = thr->ts.team;
947 /* This barrier handles all pending explicit threads.
948 As #pragma omp cancel parallel might get awaited count in
949 team->barrier in a inconsistent state, we need to use a different
950 counter here. */
951 gomp_team_barrier_wait_final (&team->barrier);
952 if (__builtin_expect (team->team_cancelled, 0))
954 struct gomp_work_share *ws = team->work_shares_to_free;
957 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
958 if (next_ws == NULL)
959 gomp_ptrlock_set (&ws->next_ws, ws);
960 gomp_fini_work_share (ws);
961 ws = next_ws;
963 while (ws != NULL);
965 else
966 gomp_fini_work_share (thr->ts.work_share);
968 gomp_end_task ();
969 thr->ts = team->prev_ts;
971 if (__builtin_expect (thr->ts.level != 0, 0))
973 #ifdef HAVE_SYNC_BUILTINS
974 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
975 #else
976 gomp_mutex_lock (&gomp_managed_threads_lock);
977 gomp_managed_threads -= team->nthreads - 1L;
978 gomp_mutex_unlock (&gomp_managed_threads_lock);
979 #endif
980 /* This barrier has gomp_barrier_wait_last counterparts
981 and ensures the team can be safely destroyed. */
982 gomp_barrier_wait (&team->barrier);
985 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
987 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
990 struct gomp_work_share *next_ws = ws->next_alloc;
991 free (ws);
992 ws = next_ws;
994 while (ws != NULL);
996 gomp_sem_destroy (&team->master_release);
998 if (__builtin_expect (thr->ts.team != NULL, 0)
999 || __builtin_expect (team->nthreads == 1, 0))
1000 free_team (team);
1001 else
1003 struct gomp_thread_pool *pool = thr->thread_pool;
1004 if (pool->last_team)
1005 free_team (pool->last_team);
1006 pool->last_team = team;
1007 gomp_release_thread_pool (pool);
1011 #ifdef LIBGOMP_USE_PTHREADS
1013 /* Constructors for this file. */
1015 static void __attribute__((constructor))
1016 initialize_team (void)
1018 #if !defined HAVE_TLS && !defined USE_EMUTLS
1019 static struct gomp_thread initial_thread_tls_data;
1021 pthread_key_create (&gomp_tls_key, NULL);
1022 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1023 #endif
1025 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1026 gomp_fatal ("could not create thread pool destructor.");
1029 static void __attribute__((destructor))
1030 team_destructor (void)
1032 /* Without this dlclose on libgomp could lead to subsequent
1033 crashes. */
1034 pthread_key_delete (gomp_thread_destructor);
1037 /* Similar to gomp_free_pool_helper, but don't detach itself,
1038 gomp_pause_host will pthread_join those threads. */
1040 static void
1041 gomp_pause_pool_helper (void *thread_pool)
1043 struct gomp_thread *thr = gomp_thread ();
1044 struct gomp_thread_pool *pool
1045 = (struct gomp_thread_pool *) thread_pool;
1046 gomp_simple_barrier_wait_last (&pool->threads_dock);
1047 gomp_sem_destroy (&thr->release);
1048 thr->thread_pool = NULL;
1049 thr->task = NULL;
1050 pthread_exit (NULL);
1053 /* Free a thread pool and release its threads. Return non-zero on
1054 failure. */
1057 gomp_pause_host (void)
1059 struct gomp_thread *thr = gomp_thread ();
1060 struct gomp_thread_pool *pool = thr->thread_pool;
1061 if (thr->ts.level)
1062 return -1;
1063 if (pool)
1065 if (pool->threads_used > 0)
1067 int i;
1068 pthread_t *thrs
1069 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1070 for (i = 1; i < pool->threads_used; i++)
1072 struct gomp_thread *nthr = pool->threads[i];
1073 nthr->fn = gomp_pause_pool_helper;
1074 nthr->data = pool;
1075 thrs[i] = gomp_thread_to_pthread_t (nthr);
1077 /* This barrier undocks threads docked on pool->threads_dock. */
1078 gomp_simple_barrier_wait (&pool->threads_dock);
1079 /* And this waits till all threads have called gomp_barrier_wait_last
1080 in gomp_pause_pool_helper. */
1081 gomp_simple_barrier_wait (&pool->threads_dock);
1082 /* Now it is safe to destroy the barrier and free the pool. */
1083 gomp_simple_barrier_destroy (&pool->threads_dock);
1085 #ifdef HAVE_SYNC_BUILTINS
1086 __sync_fetch_and_add (&gomp_managed_threads,
1087 1L - pool->threads_used);
1088 #else
1089 gomp_mutex_lock (&gomp_managed_threads_lock);
1090 gomp_managed_threads -= pool->threads_used - 1L;
1091 gomp_mutex_unlock (&gomp_managed_threads_lock);
1092 #endif
1093 for (i = 1; i < pool->threads_used; i++)
1094 pthread_join (thrs[i], NULL);
1096 if (pool->last_team)
1097 free_team (pool->last_team);
1098 #ifndef __nvptx__
1099 team_free (pool->threads);
1100 team_free (pool);
1101 #endif
1102 thr->thread_pool = NULL;
1104 return 0;
1106 #endif
1108 struct gomp_task_icv *
1109 gomp_new_icv (void)
1111 struct gomp_thread *thr = gomp_thread ();
1112 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1113 gomp_init_task (task, NULL, &gomp_global_icv);
1114 thr->task = task;
1115 #ifdef LIBGOMP_USE_PTHREADS
1116 pthread_setspecific (gomp_thread_destructor, thr);
1117 #endif
1118 return &task->icv;