Simplify creation of target_clones (PR lto/66295)
[official-gcc.git] / libgomp / team.c
blob676614ae5d011a73841ebe0b244ff535d0112bb6
1 /* Copyright (C) 2005-2017 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef LIBGOMP_USE_PTHREADS
35 /* This attribute contains PTHREAD_CREATE_DETACHED. */
36 pthread_attr_t gomp_thread_attr;
38 /* This key is for the thread destructor. */
39 pthread_key_t gomp_thread_destructor;
42 /* This is the libgomp per-thread data structure. */
43 #if defined HAVE_TLS || defined USE_EMUTLS
44 __thread struct gomp_thread gomp_tls_data;
45 #else
46 pthread_key_t gomp_tls_key;
47 #endif
50 /* This structure is used to communicate across pthread_create. */
52 struct gomp_thread_start_data
54 void (*fn) (void *);
55 void *fn_data;
56 struct gomp_team_state ts;
57 struct gomp_task *task;
58 struct gomp_thread_pool *thread_pool;
59 unsigned int place;
60 bool nested;
64 /* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
67 static void *
68 gomp_thread_start (void *xdata)
70 struct gomp_thread_start_data *data = xdata;
71 struct gomp_thread *thr;
72 struct gomp_thread_pool *pool;
73 void (*local_fn) (void *);
74 void *local_data;
76 #if defined HAVE_TLS || defined USE_EMUTLS
77 thr = &gomp_tls_data;
78 #else
79 struct gomp_thread local_thr;
80 thr = &local_thr;
81 pthread_setspecific (gomp_tls_key, thr);
82 #endif
83 gomp_sem_init (&thr->release, 0);
85 /* Extract what we need from data. */
86 local_fn = data->fn;
87 local_data = data->fn_data;
88 thr->thread_pool = data->thread_pool;
89 thr->ts = data->ts;
90 thr->task = data->task;
91 thr->place = data->place;
93 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
95 /* Make thread pool local. */
96 pool = thr->thread_pool;
98 if (data->nested)
100 struct gomp_team *team = thr->ts.team;
101 struct gomp_task *task = thr->task;
103 gomp_barrier_wait (&team->barrier);
105 local_fn (local_data);
106 gomp_team_barrier_wait_final (&team->barrier);
107 gomp_finish_task (task);
108 gomp_barrier_wait_last (&team->barrier);
110 else
112 pool->threads[thr->ts.team_id] = thr;
114 gomp_simple_barrier_wait (&pool->threads_dock);
117 struct gomp_team *team = thr->ts.team;
118 struct gomp_task *task = thr->task;
120 local_fn (local_data);
121 gomp_team_barrier_wait_final (&team->barrier);
122 gomp_finish_task (task);
124 gomp_simple_barrier_wait (&pool->threads_dock);
126 local_fn = thr->fn;
127 local_data = thr->data;
128 thr->fn = NULL;
130 while (local_fn);
133 gomp_sem_destroy (&thr->release);
134 thr->thread_pool = NULL;
135 thr->task = NULL;
136 return NULL;
138 #endif
140 static inline struct gomp_team *
141 get_last_team (unsigned nthreads)
143 struct gomp_thread *thr = gomp_thread ();
144 if (thr->ts.team == NULL)
146 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
147 struct gomp_team *last_team = pool->last_team;
148 if (last_team != NULL && last_team->nthreads == nthreads)
150 pool->last_team = NULL;
151 return last_team;
154 return NULL;
157 /* Create a new team data structure. */
159 struct gomp_team *
160 gomp_new_team (unsigned nthreads)
162 struct gomp_team *team;
163 int i;
165 team = get_last_team (nthreads);
166 if (team == NULL)
168 size_t extra = sizeof (team->ordered_release[0])
169 + sizeof (team->implicit_task[0]);
170 team = gomp_malloc (sizeof (*team) + nthreads * extra);
172 #ifndef HAVE_SYNC_BUILTINS
173 gomp_mutex_init (&team->work_share_list_free_lock);
174 #endif
175 gomp_barrier_init (&team->barrier, nthreads);
176 gomp_mutex_init (&team->task_lock);
178 team->nthreads = nthreads;
181 team->work_share_chunk = 8;
182 #ifdef HAVE_SYNC_BUILTINS
183 team->single_count = 0;
184 #endif
185 team->work_shares_to_free = &team->work_shares[0];
186 gomp_init_work_share (&team->work_shares[0], false, nthreads);
187 team->work_shares[0].next_alloc = NULL;
188 team->work_share_list_free = NULL;
189 team->work_share_list_alloc = &team->work_shares[1];
190 for (i = 1; i < 7; i++)
191 team->work_shares[i].next_free = &team->work_shares[i + 1];
192 team->work_shares[i].next_free = NULL;
194 gomp_sem_init (&team->master_release, 0);
195 team->ordered_release = (void *) &team->implicit_task[nthreads];
196 team->ordered_release[0] = &team->master_release;
198 priority_queue_init (&team->task_queue);
199 team->task_count = 0;
200 team->task_queued_count = 0;
201 team->task_running_count = 0;
202 team->work_share_cancelled = 0;
203 team->team_cancelled = 0;
205 return team;
209 /* Free a team data structure. */
211 static void
212 free_team (struct gomp_team *team)
214 #ifndef HAVE_SYNC_BUILTINS
215 gomp_mutex_destroy (&team->work_share_list_free_lock);
216 #endif
217 gomp_barrier_destroy (&team->barrier);
218 gomp_mutex_destroy (&team->task_lock);
219 priority_queue_free (&team->task_queue);
220 free (team);
223 static void
224 gomp_free_pool_helper (void *thread_pool)
226 struct gomp_thread *thr = gomp_thread ();
227 struct gomp_thread_pool *pool
228 = (struct gomp_thread_pool *) thread_pool;
229 gomp_simple_barrier_wait_last (&pool->threads_dock);
230 gomp_sem_destroy (&thr->release);
231 thr->thread_pool = NULL;
232 thr->task = NULL;
233 #ifdef LIBGOMP_USE_PTHREADS
234 pthread_exit (NULL);
235 #elif defined(__nvptx__)
236 asm ("exit;");
237 #else
238 #error gomp_free_pool_helper must terminate the thread
239 #endif
242 /* Free a thread pool and release its threads. */
244 void
245 gomp_free_thread (void *arg __attribute__((unused)))
247 struct gomp_thread *thr = gomp_thread ();
248 struct gomp_thread_pool *pool = thr->thread_pool;
249 if (pool)
251 if (pool->threads_used > 0)
253 int i;
254 for (i = 1; i < pool->threads_used; i++)
256 struct gomp_thread *nthr = pool->threads[i];
257 nthr->fn = gomp_free_pool_helper;
258 nthr->data = pool;
260 /* This barrier undocks threads docked on pool->threads_dock. */
261 gomp_simple_barrier_wait (&pool->threads_dock);
262 /* And this waits till all threads have called gomp_barrier_wait_last
263 in gomp_free_pool_helper. */
264 gomp_simple_barrier_wait (&pool->threads_dock);
265 /* Now it is safe to destroy the barrier and free the pool. */
266 gomp_simple_barrier_destroy (&pool->threads_dock);
268 #ifdef HAVE_SYNC_BUILTINS
269 __sync_fetch_and_add (&gomp_managed_threads,
270 1L - pool->threads_used);
271 #else
272 gomp_mutex_lock (&gomp_managed_threads_lock);
273 gomp_managed_threads -= pool->threads_used - 1L;
274 gomp_mutex_unlock (&gomp_managed_threads_lock);
275 #endif
277 if (pool->last_team)
278 free_team (pool->last_team);
279 #ifndef __nvptx__
280 free (pool->threads);
281 free (pool);
282 #endif
283 thr->thread_pool = NULL;
285 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
286 gomp_team_end ();
287 if (thr->task != NULL)
289 struct gomp_task *task = thr->task;
290 gomp_end_task ();
291 free (task);
295 /* Launch a team. */
297 #ifdef LIBGOMP_USE_PTHREADS
298 void
299 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
300 unsigned flags, struct gomp_team *team)
302 struct gomp_thread_start_data *start_data;
303 struct gomp_thread *thr, *nthr;
304 struct gomp_task *task;
305 struct gomp_task_icv *icv;
306 bool nested;
307 struct gomp_thread_pool *pool;
308 unsigned i, n, old_threads_used = 0;
309 pthread_attr_t thread_attr, *attr;
310 unsigned long nthreads_var;
311 char bind, bind_var;
312 unsigned int s = 0, rest = 0, p = 0, k = 0;
313 unsigned int affinity_count = 0;
314 struct gomp_thread **affinity_thr = NULL;
316 thr = gomp_thread ();
317 nested = thr->ts.level;
318 pool = thr->thread_pool;
319 task = thr->task;
320 icv = task ? &task->icv : &gomp_global_icv;
321 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
322 gomp_init_affinity ();
324 /* Always save the previous state, even if this isn't a nested team.
325 In particular, we should save any work share state from an outer
326 orphaned work share construct. */
327 team->prev_ts = thr->ts;
329 thr->ts.team = team;
330 thr->ts.team_id = 0;
331 ++thr->ts.level;
332 if (nthreads > 1)
333 ++thr->ts.active_level;
334 thr->ts.work_share = &team->work_shares[0];
335 thr->ts.last_work_share = NULL;
336 #ifdef HAVE_SYNC_BUILTINS
337 thr->ts.single_count = 0;
338 #endif
339 thr->ts.static_trip = 0;
340 thr->task = &team->implicit_task[0];
341 nthreads_var = icv->nthreads_var;
342 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
343 && thr->ts.level < gomp_nthreads_var_list_len)
344 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
345 bind_var = icv->bind_var;
346 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
347 bind_var = flags & 7;
348 bind = bind_var;
349 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
350 && thr->ts.level < gomp_bind_var_list_len)
351 bind_var = gomp_bind_var_list[thr->ts.level];
352 gomp_init_task (thr->task, task, icv);
353 team->implicit_task[0].icv.nthreads_var = nthreads_var;
354 team->implicit_task[0].icv.bind_var = bind_var;
356 if (nthreads == 1)
357 return;
359 i = 1;
361 if (__builtin_expect (gomp_places_list != NULL, 0))
363 /* Depending on chosen proc_bind model, set subpartition
364 for the master thread and initialize helper variables
365 P and optionally S, K and/or REST used by later place
366 computation for each additional thread. */
367 p = thr->place - 1;
368 switch (bind)
370 case omp_proc_bind_true:
371 case omp_proc_bind_close:
372 if (nthreads > thr->ts.place_partition_len)
374 /* T > P. S threads will be placed in each place,
375 and the final REM threads placed one by one
376 into the already occupied places. */
377 s = nthreads / thr->ts.place_partition_len;
378 rest = nthreads % thr->ts.place_partition_len;
380 else
381 s = 1;
382 k = 1;
383 break;
384 case omp_proc_bind_master:
385 /* Each thread will be bound to master's place. */
386 break;
387 case omp_proc_bind_spread:
388 if (nthreads <= thr->ts.place_partition_len)
390 /* T <= P. Each subpartition will have in between s
391 and s+1 places (subpartitions starting at or
392 after rest will have s places, earlier s+1 places),
393 each thread will be bound to the first place in
394 its subpartition (except for the master thread
395 that can be bound to another place in its
396 subpartition). */
397 s = thr->ts.place_partition_len / nthreads;
398 rest = thr->ts.place_partition_len % nthreads;
399 rest = (s + 1) * rest + thr->ts.place_partition_off;
400 if (p < rest)
402 p -= (p - thr->ts.place_partition_off) % (s + 1);
403 thr->ts.place_partition_len = s + 1;
405 else
407 p -= (p - rest) % s;
408 thr->ts.place_partition_len = s;
410 thr->ts.place_partition_off = p;
412 else
414 /* T > P. Each subpartition will have just a single
415 place and we'll place between s and s+1
416 threads into each subpartition. */
417 s = nthreads / thr->ts.place_partition_len;
418 rest = nthreads % thr->ts.place_partition_len;
419 thr->ts.place_partition_off = p;
420 thr->ts.place_partition_len = 1;
421 k = 1;
423 break;
426 else
427 bind = omp_proc_bind_false;
429 /* We only allow the reuse of idle threads for non-nested PARALLEL
430 regions. This appears to be implied by the semantics of
431 threadprivate variables, but perhaps that's reading too much into
432 things. Certainly it does prevent any locking problems, since
433 only the initial program thread will modify gomp_threads. */
434 if (!nested)
436 old_threads_used = pool->threads_used;
438 if (nthreads <= old_threads_used)
439 n = nthreads;
440 else if (old_threads_used == 0)
442 n = 0;
443 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
445 else
447 n = old_threads_used;
449 /* Increase the barrier threshold to make sure all new
450 threads arrive before the team is released. */
451 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
454 /* Not true yet, but soon will be. We're going to release all
455 threads from the dock, and those that aren't part of the
456 team will exit. */
457 pool->threads_used = nthreads;
459 /* If necessary, expand the size of the gomp_threads array. It is
460 expected that changes in the number of threads are rare, thus we
461 make no effort to expand gomp_threads_size geometrically. */
462 if (nthreads >= pool->threads_size)
464 pool->threads_size = nthreads + 1;
465 pool->threads
466 = gomp_realloc (pool->threads,
467 pool->threads_size
468 * sizeof (struct gomp_thread_data *));
471 /* Release existing idle threads. */
472 for (; i < n; ++i)
474 unsigned int place_partition_off = thr->ts.place_partition_off;
475 unsigned int place_partition_len = thr->ts.place_partition_len;
476 unsigned int place = 0;
477 if (__builtin_expect (gomp_places_list != NULL, 0))
479 switch (bind)
481 case omp_proc_bind_true:
482 case omp_proc_bind_close:
483 if (k == s)
485 ++p;
486 if (p == (team->prev_ts.place_partition_off
487 + team->prev_ts.place_partition_len))
488 p = team->prev_ts.place_partition_off;
489 k = 1;
490 if (i == nthreads - rest)
491 s = 1;
493 else
494 ++k;
495 break;
496 case omp_proc_bind_master:
497 break;
498 case omp_proc_bind_spread:
499 if (k == 0)
501 /* T <= P. */
502 if (p < rest)
503 p += s + 1;
504 else
505 p += s;
506 if (p == (team->prev_ts.place_partition_off
507 + team->prev_ts.place_partition_len))
508 p = team->prev_ts.place_partition_off;
509 place_partition_off = p;
510 if (p < rest)
511 place_partition_len = s + 1;
512 else
513 place_partition_len = s;
515 else
517 /* T > P. */
518 if (k == s)
520 ++p;
521 if (p == (team->prev_ts.place_partition_off
522 + team->prev_ts.place_partition_len))
523 p = team->prev_ts.place_partition_off;
524 k = 1;
525 if (i == nthreads - rest)
526 s = 1;
528 else
529 ++k;
530 place_partition_off = p;
531 place_partition_len = 1;
533 break;
535 if (affinity_thr != NULL
536 || (bind != omp_proc_bind_true
537 && pool->threads[i]->place != p + 1)
538 || pool->threads[i]->place <= place_partition_off
539 || pool->threads[i]->place > (place_partition_off
540 + place_partition_len))
542 unsigned int l;
543 if (affinity_thr == NULL)
545 unsigned int j;
547 if (team->prev_ts.place_partition_len > 64)
548 affinity_thr
549 = gomp_malloc (team->prev_ts.place_partition_len
550 * sizeof (struct gomp_thread *));
551 else
552 affinity_thr
553 = gomp_alloca (team->prev_ts.place_partition_len
554 * sizeof (struct gomp_thread *));
555 memset (affinity_thr, '\0',
556 team->prev_ts.place_partition_len
557 * sizeof (struct gomp_thread *));
558 for (j = i; j < old_threads_used; j++)
560 if (pool->threads[j]->place
561 > team->prev_ts.place_partition_off
562 && (pool->threads[j]->place
563 <= (team->prev_ts.place_partition_off
564 + team->prev_ts.place_partition_len)))
566 l = pool->threads[j]->place - 1
567 - team->prev_ts.place_partition_off;
568 pool->threads[j]->data = affinity_thr[l];
569 affinity_thr[l] = pool->threads[j];
571 pool->threads[j] = NULL;
573 if (nthreads > old_threads_used)
574 memset (&pool->threads[old_threads_used],
575 '\0', ((nthreads - old_threads_used)
576 * sizeof (struct gomp_thread *)));
577 n = nthreads;
578 affinity_count = old_threads_used - i;
580 if (affinity_count == 0)
581 break;
582 l = p;
583 if (affinity_thr[l - team->prev_ts.place_partition_off]
584 == NULL)
586 if (bind != omp_proc_bind_true)
587 continue;
588 for (l = place_partition_off;
589 l < place_partition_off + place_partition_len;
590 l++)
591 if (affinity_thr[l - team->prev_ts.place_partition_off]
592 != NULL)
593 break;
594 if (l == place_partition_off + place_partition_len)
595 continue;
597 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
598 affinity_thr[l - team->prev_ts.place_partition_off]
599 = (struct gomp_thread *) nthr->data;
600 affinity_count--;
601 pool->threads[i] = nthr;
603 else
604 nthr = pool->threads[i];
605 place = p + 1;
607 else
608 nthr = pool->threads[i];
609 nthr->ts.team = team;
610 nthr->ts.work_share = &team->work_shares[0];
611 nthr->ts.last_work_share = NULL;
612 nthr->ts.team_id = i;
613 nthr->ts.level = team->prev_ts.level + 1;
614 nthr->ts.active_level = thr->ts.active_level;
615 nthr->ts.place_partition_off = place_partition_off;
616 nthr->ts.place_partition_len = place_partition_len;
617 #ifdef HAVE_SYNC_BUILTINS
618 nthr->ts.single_count = 0;
619 #endif
620 nthr->ts.static_trip = 0;
621 nthr->task = &team->implicit_task[i];
622 nthr->place = place;
623 gomp_init_task (nthr->task, task, icv);
624 team->implicit_task[i].icv.nthreads_var = nthreads_var;
625 team->implicit_task[i].icv.bind_var = bind_var;
626 nthr->fn = fn;
627 nthr->data = data;
628 team->ordered_release[i] = &nthr->release;
631 if (__builtin_expect (affinity_thr != NULL, 0))
633 /* If AFFINITY_THR is non-NULL just because we had to
634 permute some threads in the pool, but we've managed
635 to find exactly as many old threads as we'd find
636 without affinity, we don't need to handle this
637 specially anymore. */
638 if (nthreads <= old_threads_used
639 ? (affinity_count == old_threads_used - nthreads)
640 : (i == old_threads_used))
642 if (team->prev_ts.place_partition_len > 64)
643 free (affinity_thr);
644 affinity_thr = NULL;
645 affinity_count = 0;
647 else
649 i = 1;
650 /* We are going to compute the places/subpartitions
651 again from the beginning. So, we need to reinitialize
652 vars modified by the switch (bind) above inside
653 of the loop, to the state they had after the initial
654 switch (bind). */
655 switch (bind)
657 case omp_proc_bind_true:
658 case omp_proc_bind_close:
659 if (nthreads > thr->ts.place_partition_len)
660 /* T > P. S has been changed, so needs
661 to be recomputed. */
662 s = nthreads / thr->ts.place_partition_len;
663 k = 1;
664 p = thr->place - 1;
665 break;
666 case omp_proc_bind_master:
667 /* No vars have been changed. */
668 break;
669 case omp_proc_bind_spread:
670 p = thr->ts.place_partition_off;
671 if (k != 0)
673 /* T > P. */
674 s = nthreads / team->prev_ts.place_partition_len;
675 k = 1;
677 break;
680 /* Increase the barrier threshold to make sure all new
681 threads and all the threads we're going to let die
682 arrive before the team is released. */
683 if (affinity_count)
684 gomp_simple_barrier_reinit (&pool->threads_dock,
685 nthreads + affinity_count);
689 if (i == nthreads)
690 goto do_release;
694 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
696 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
698 if (old_threads_used == 0)
699 --diff;
701 #ifdef HAVE_SYNC_BUILTINS
702 __sync_fetch_and_add (&gomp_managed_threads, diff);
703 #else
704 gomp_mutex_lock (&gomp_managed_threads_lock);
705 gomp_managed_threads += diff;
706 gomp_mutex_unlock (&gomp_managed_threads_lock);
707 #endif
710 attr = &gomp_thread_attr;
711 if (__builtin_expect (gomp_places_list != NULL, 0))
713 size_t stacksize;
714 pthread_attr_init (&thread_attr);
715 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
716 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
717 pthread_attr_setstacksize (&thread_attr, stacksize);
718 attr = &thread_attr;
721 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
722 * (nthreads-i));
724 /* Launch new threads. */
725 for (; i < nthreads; ++i)
727 pthread_t pt;
728 int err;
730 start_data->ts.place_partition_off = thr->ts.place_partition_off;
731 start_data->ts.place_partition_len = thr->ts.place_partition_len;
732 start_data->place = 0;
733 if (__builtin_expect (gomp_places_list != NULL, 0))
735 switch (bind)
737 case omp_proc_bind_true:
738 case omp_proc_bind_close:
739 if (k == s)
741 ++p;
742 if (p == (team->prev_ts.place_partition_off
743 + team->prev_ts.place_partition_len))
744 p = team->prev_ts.place_partition_off;
745 k = 1;
746 if (i == nthreads - rest)
747 s = 1;
749 else
750 ++k;
751 break;
752 case omp_proc_bind_master:
753 break;
754 case omp_proc_bind_spread:
755 if (k == 0)
757 /* T <= P. */
758 if (p < rest)
759 p += s + 1;
760 else
761 p += s;
762 if (p == (team->prev_ts.place_partition_off
763 + team->prev_ts.place_partition_len))
764 p = team->prev_ts.place_partition_off;
765 start_data->ts.place_partition_off = p;
766 if (p < rest)
767 start_data->ts.place_partition_len = s + 1;
768 else
769 start_data->ts.place_partition_len = s;
771 else
773 /* T > P. */
774 if (k == s)
776 ++p;
777 if (p == (team->prev_ts.place_partition_off
778 + team->prev_ts.place_partition_len))
779 p = team->prev_ts.place_partition_off;
780 k = 1;
781 if (i == nthreads - rest)
782 s = 1;
784 else
785 ++k;
786 start_data->ts.place_partition_off = p;
787 start_data->ts.place_partition_len = 1;
789 break;
791 start_data->place = p + 1;
792 if (affinity_thr != NULL && pool->threads[i] != NULL)
793 continue;
794 gomp_init_thread_affinity (attr, p);
797 start_data->fn = fn;
798 start_data->fn_data = data;
799 start_data->ts.team = team;
800 start_data->ts.work_share = &team->work_shares[0];
801 start_data->ts.last_work_share = NULL;
802 start_data->ts.team_id = i;
803 start_data->ts.level = team->prev_ts.level + 1;
804 start_data->ts.active_level = thr->ts.active_level;
805 #ifdef HAVE_SYNC_BUILTINS
806 start_data->ts.single_count = 0;
807 #endif
808 start_data->ts.static_trip = 0;
809 start_data->task = &team->implicit_task[i];
810 gomp_init_task (start_data->task, task, icv);
811 team->implicit_task[i].icv.nthreads_var = nthreads_var;
812 team->implicit_task[i].icv.bind_var = bind_var;
813 start_data->thread_pool = pool;
814 start_data->nested = nested;
816 attr = gomp_adjust_thread_attr (attr, &thread_attr);
817 err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
818 if (err != 0)
819 gomp_fatal ("Thread creation failed: %s", strerror (err));
822 if (__builtin_expect (attr == &thread_attr, 0))
823 pthread_attr_destroy (&thread_attr);
825 do_release:
826 if (nested)
827 gomp_barrier_wait (&team->barrier);
828 else
829 gomp_simple_barrier_wait (&pool->threads_dock);
831 /* Decrease the barrier threshold to match the number of threads
832 that should arrive back at the end of this team. The extra
833 threads should be exiting. Note that we arrange for this test
834 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
835 the barrier as well as gomp_managed_threads was temporarily
836 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
837 AFFINITY_COUNT if non-zero will be always at least
838 OLD_THREADS_COUNT - NTHREADS. */
839 if (__builtin_expect (nthreads < old_threads_used, 0)
840 || __builtin_expect (affinity_count, 0))
842 long diff = (long) nthreads - (long) old_threads_used;
844 if (affinity_count)
845 diff = -affinity_count;
847 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
849 #ifdef HAVE_SYNC_BUILTINS
850 __sync_fetch_and_add (&gomp_managed_threads, diff);
851 #else
852 gomp_mutex_lock (&gomp_managed_threads_lock);
853 gomp_managed_threads += diff;
854 gomp_mutex_unlock (&gomp_managed_threads_lock);
855 #endif
857 if (__builtin_expect (affinity_thr != NULL, 0)
858 && team->prev_ts.place_partition_len > 64)
859 free (affinity_thr);
861 #endif
864 /* Terminate the current team. This is only to be called by the master
865 thread. We assume that we must wait for the other threads. */
867 void
868 gomp_team_end (void)
870 struct gomp_thread *thr = gomp_thread ();
871 struct gomp_team *team = thr->ts.team;
873 /* This barrier handles all pending explicit threads.
874 As #pragma omp cancel parallel might get awaited count in
875 team->barrier in a inconsistent state, we need to use a different
876 counter here. */
877 gomp_team_barrier_wait_final (&team->barrier);
878 if (__builtin_expect (team->team_cancelled, 0))
880 struct gomp_work_share *ws = team->work_shares_to_free;
883 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
884 if (next_ws == NULL)
885 gomp_ptrlock_set (&ws->next_ws, ws);
886 gomp_fini_work_share (ws);
887 ws = next_ws;
889 while (ws != NULL);
891 else
892 gomp_fini_work_share (thr->ts.work_share);
894 gomp_end_task ();
895 thr->ts = team->prev_ts;
897 if (__builtin_expect (thr->ts.team != NULL, 0))
899 #ifdef HAVE_SYNC_BUILTINS
900 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
901 #else
902 gomp_mutex_lock (&gomp_managed_threads_lock);
903 gomp_managed_threads -= team->nthreads - 1L;
904 gomp_mutex_unlock (&gomp_managed_threads_lock);
905 #endif
906 /* This barrier has gomp_barrier_wait_last counterparts
907 and ensures the team can be safely destroyed. */
908 gomp_barrier_wait (&team->barrier);
911 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
913 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
916 struct gomp_work_share *next_ws = ws->next_alloc;
917 free (ws);
918 ws = next_ws;
920 while (ws != NULL);
922 gomp_sem_destroy (&team->master_release);
924 if (__builtin_expect (thr->ts.team != NULL, 0)
925 || __builtin_expect (team->nthreads == 1, 0))
926 free_team (team);
927 else
929 struct gomp_thread_pool *pool = thr->thread_pool;
930 if (pool->last_team)
931 free_team (pool->last_team);
932 pool->last_team = team;
933 gomp_release_thread_pool (pool);
937 #ifdef LIBGOMP_USE_PTHREADS
939 /* Constructors for this file. */
941 static void __attribute__((constructor))
942 initialize_team (void)
944 #if !defined HAVE_TLS && !defined USE_EMUTLS
945 static struct gomp_thread initial_thread_tls_data;
947 pthread_key_create (&gomp_tls_key, NULL);
948 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
949 #endif
951 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
952 gomp_fatal ("could not create thread pool destructor.");
955 static void __attribute__((destructor))
956 team_destructor (void)
958 /* Without this dlclose on libgomp could lead to subsequent
959 crashes. */
960 pthread_key_delete (gomp_thread_destructor);
962 #endif
964 struct gomp_task_icv *
965 gomp_new_icv (void)
967 struct gomp_thread *thr = gomp_thread ();
968 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
969 gomp_init_task (task, NULL, &gomp_global_icv);
970 thr->task = task;
971 #ifdef LIBGOMP_USE_PTHREADS
972 pthread_setspecific (gomp_thread_destructor, thr);
973 #endif
974 return &task->icv;