gcc/
[official-gcc.git] / libgomp / team.c
blob7671b05c5bd6a3541b921173cf56630af97a6898
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
33 /* This attribute contains PTHREAD_CREATE_DETACHED. */
34 pthread_attr_t gomp_thread_attr;
36 /* This key is for the thread destructor. */
37 pthread_key_t gomp_thread_destructor;
40 /* This is the libgomp per-thread data structure. */
41 #if defined HAVE_TLS || defined USE_EMUTLS
42 __thread struct gomp_thread gomp_tls_data;
43 #else
44 pthread_key_t gomp_tls_key;
45 #endif
48 /* This structure is used to communicate across pthread_create. */
50 struct gomp_thread_start_data
52 void (*fn) (void *);
53 void *fn_data;
54 struct gomp_team_state ts;
55 struct gomp_task *task;
56 struct gomp_thread_pool *thread_pool;
57 unsigned int place;
58 bool nested;
62 /* This function is a pthread_create entry point. This contains the idle
63 loop in which a thread waits to be called up to become part of a team. */
65 static void *
66 gomp_thread_start (void *xdata)
68 struct gomp_thread_start_data *data = xdata;
69 struct gomp_thread *thr;
70 struct gomp_thread_pool *pool;
71 void (*local_fn) (void *);
72 void *local_data;
74 #if defined HAVE_TLS || defined USE_EMUTLS
75 thr = &gomp_tls_data;
76 #else
77 struct gomp_thread local_thr;
78 thr = &local_thr;
79 pthread_setspecific (gomp_tls_key, thr);
80 #endif
81 gomp_sem_init (&thr->release, 0);
83 /* Extract what we need from data. */
84 local_fn = data->fn;
85 local_data = data->fn_data;
86 thr->thread_pool = data->thread_pool;
87 thr->ts = data->ts;
88 thr->task = data->task;
89 thr->place = data->place;
91 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
93 /* Make thread pool local. */
94 pool = thr->thread_pool;
96 if (data->nested)
98 struct gomp_team *team = thr->ts.team;
99 struct gomp_task *task = thr->task;
101 gomp_barrier_wait (&team->barrier);
103 local_fn (local_data);
104 gomp_team_barrier_wait_final (&team->barrier);
105 gomp_finish_task (task);
106 gomp_barrier_wait_last (&team->barrier);
108 else
110 pool->threads[thr->ts.team_id] = thr;
112 gomp_barrier_wait (&pool->threads_dock);
115 struct gomp_team *team = thr->ts.team;
116 struct gomp_task *task = thr->task;
118 local_fn (local_data);
119 gomp_team_barrier_wait_final (&team->barrier);
120 gomp_finish_task (task);
122 gomp_barrier_wait (&pool->threads_dock);
124 local_fn = thr->fn;
125 local_data = thr->data;
126 thr->fn = NULL;
128 while (local_fn);
131 gomp_sem_destroy (&thr->release);
132 thr->thread_pool = NULL;
133 thr->task = NULL;
134 return NULL;
137 static inline struct gomp_team *
138 get_last_team (unsigned nthreads)
140 struct gomp_thread *thr = gomp_thread ();
141 if (thr->ts.team == NULL)
143 struct gomp_thread_pool *pool = thr->thread_pool;
144 if (pool != NULL)
146 struct gomp_team *last_team = pool->last_team;
147 if (last_team != NULL && last_team->nthreads == nthreads)
149 pool->last_team = NULL;
150 return last_team;
154 return NULL;
157 /* Create a new team data structure. */
159 struct gomp_team *
160 gomp_new_team (unsigned nthreads)
162 struct gomp_team *team;
163 int i;
165 team = get_last_team (nthreads);
166 if (team == NULL)
168 size_t extra = sizeof (team->ordered_release[0])
169 + sizeof (team->implicit_task[0]);
170 team = gomp_malloc (sizeof (*team) + nthreads * extra);
172 #ifndef HAVE_SYNC_BUILTINS
173 gomp_mutex_init (&team->work_share_list_free_lock);
174 #endif
175 gomp_barrier_init (&team->barrier, nthreads);
176 gomp_mutex_init (&team->task_lock);
178 team->nthreads = nthreads;
181 team->work_share_chunk = 8;
182 #ifdef HAVE_SYNC_BUILTINS
183 team->single_count = 0;
184 #endif
185 team->work_shares_to_free = &team->work_shares[0];
186 gomp_init_work_share (&team->work_shares[0], false, nthreads);
187 team->work_shares[0].next_alloc = NULL;
188 team->work_share_list_free = NULL;
189 team->work_share_list_alloc = &team->work_shares[1];
190 for (i = 1; i < 7; i++)
191 team->work_shares[i].next_free = &team->work_shares[i + 1];
192 team->work_shares[i].next_free = NULL;
194 gomp_sem_init (&team->master_release, 0);
195 team->ordered_release = (void *) &team->implicit_task[nthreads];
196 team->ordered_release[0] = &team->master_release;
198 team->task_queue = NULL;
199 team->task_count = 0;
200 team->task_queued_count = 0;
201 team->task_running_count = 0;
202 team->work_share_cancelled = 0;
203 team->team_cancelled = 0;
205 return team;
209 /* Free a team data structure. */
211 static void
212 free_team (struct gomp_team *team)
214 #ifndef HAVE_SYNC_BUILTINS
215 gomp_mutex_destroy (&team->work_share_list_free_lock);
216 #endif
217 gomp_barrier_destroy (&team->barrier);
218 gomp_mutex_destroy (&team->task_lock);
219 free (team);
222 /* Allocate and initialize a thread pool. */
224 static struct gomp_thread_pool *gomp_new_thread_pool (void)
226 struct gomp_thread_pool *pool
227 = gomp_malloc (sizeof(struct gomp_thread_pool));
228 pool->threads = NULL;
229 pool->threads_size = 0;
230 pool->threads_used = 0;
231 pool->last_team = NULL;
232 return pool;
235 static void
236 gomp_free_pool_helper (void *thread_pool)
238 struct gomp_thread *thr = gomp_thread ();
239 struct gomp_thread_pool *pool
240 = (struct gomp_thread_pool *) thread_pool;
241 gomp_barrier_wait_last (&pool->threads_dock);
242 gomp_sem_destroy (&thr->release);
243 thr->thread_pool = NULL;
244 thr->task = NULL;
245 pthread_exit (NULL);
248 /* Free a thread pool and release its threads. */
250 void
251 gomp_free_thread (void *arg __attribute__((unused)))
253 struct gomp_thread *thr = gomp_thread ();
254 struct gomp_thread_pool *pool = thr->thread_pool;
255 if (pool)
257 if (pool->threads_used > 0)
259 int i;
260 for (i = 1; i < pool->threads_used; i++)
262 struct gomp_thread *nthr = pool->threads[i];
263 nthr->fn = gomp_free_pool_helper;
264 nthr->data = pool;
266 /* This barrier undocks threads docked on pool->threads_dock. */
267 gomp_barrier_wait (&pool->threads_dock);
268 /* And this waits till all threads have called gomp_barrier_wait_last
269 in gomp_free_pool_helper. */
270 gomp_barrier_wait (&pool->threads_dock);
271 /* Now it is safe to destroy the barrier and free the pool. */
272 gomp_barrier_destroy (&pool->threads_dock);
274 #ifdef HAVE_SYNC_BUILTINS
275 __sync_fetch_and_add (&gomp_managed_threads,
276 1L - pool->threads_used);
277 #else
278 gomp_mutex_lock (&gomp_managed_threads_lock);
279 gomp_managed_threads -= pool->threads_used - 1L;
280 gomp_mutex_unlock (&gomp_managed_threads_lock);
281 #endif
283 free (pool->threads);
284 if (pool->last_team)
285 free_team (pool->last_team);
286 free (pool);
287 thr->thread_pool = NULL;
289 if (thr->task != NULL)
291 struct gomp_task *task = thr->task;
292 gomp_end_task ();
293 free (task);
297 /* Launch a team. */
299 void
300 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
301 unsigned flags, struct gomp_team *team)
303 struct gomp_thread_start_data *start_data;
304 struct gomp_thread *thr, *nthr;
305 struct gomp_task *task;
306 struct gomp_task_icv *icv;
307 bool nested;
308 struct gomp_thread_pool *pool;
309 unsigned i, n, old_threads_used = 0;
310 pthread_attr_t thread_attr, *attr;
311 unsigned long nthreads_var;
312 char bind, bind_var;
313 unsigned int s = 0, rest = 0, p = 0, k = 0;
314 unsigned int affinity_count = 0;
315 struct gomp_thread **affinity_thr = NULL;
317 thr = gomp_thread ();
318 nested = thr->ts.team != NULL;
319 if (__builtin_expect (thr->thread_pool == NULL, 0))
321 thr->thread_pool = gomp_new_thread_pool ();
322 thr->thread_pool->threads_busy = nthreads;
323 pthread_setspecific (gomp_thread_destructor, thr);
325 pool = thr->thread_pool;
326 task = thr->task;
327 icv = task ? &task->icv : &gomp_global_icv;
328 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
329 gomp_init_affinity ();
331 /* Always save the previous state, even if this isn't a nested team.
332 In particular, we should save any work share state from an outer
333 orphaned work share construct. */
334 team->prev_ts = thr->ts;
336 thr->ts.team = team;
337 thr->ts.team_id = 0;
338 ++thr->ts.level;
339 if (nthreads > 1)
340 ++thr->ts.active_level;
341 thr->ts.work_share = &team->work_shares[0];
342 thr->ts.last_work_share = NULL;
343 #ifdef HAVE_SYNC_BUILTINS
344 thr->ts.single_count = 0;
345 #endif
346 thr->ts.static_trip = 0;
347 thr->task = &team->implicit_task[0];
348 nthreads_var = icv->nthreads_var;
349 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
350 && thr->ts.level < gomp_nthreads_var_list_len)
351 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
352 bind_var = icv->bind_var;
353 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
354 bind_var = flags & 7;
355 bind = bind_var;
356 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
357 && thr->ts.level < gomp_bind_var_list_len)
358 bind_var = gomp_bind_var_list[thr->ts.level];
359 gomp_init_task (thr->task, task, icv);
360 team->implicit_task[0].icv.nthreads_var = nthreads_var;
361 team->implicit_task[0].icv.bind_var = bind_var;
363 if (nthreads == 1)
364 return;
366 i = 1;
368 if (__builtin_expect (gomp_places_list != NULL, 0))
370 /* Depending on chosen proc_bind model, set subpartition
371 for the master thread and initialize helper variables
372 P and optionally S, K and/or REST used by later place
373 computation for each additional thread. */
374 p = thr->place - 1;
375 switch (bind)
377 case omp_proc_bind_true:
378 case omp_proc_bind_close:
379 if (nthreads > thr->ts.place_partition_len)
381 /* T > P. S threads will be placed in each place,
382 and the final REM threads placed one by one
383 into the already occupied places. */
384 s = nthreads / thr->ts.place_partition_len;
385 rest = nthreads % thr->ts.place_partition_len;
387 else
388 s = 1;
389 k = 1;
390 break;
391 case omp_proc_bind_master:
392 /* Each thread will be bound to master's place. */
393 break;
394 case omp_proc_bind_spread:
395 if (nthreads <= thr->ts.place_partition_len)
397 /* T <= P. Each subpartition will have in between s
398 and s+1 places (subpartitions starting at or
399 after rest will have s places, earlier s+1 places),
400 each thread will be bound to the first place in
401 its subpartition (except for the master thread
402 that can be bound to another place in its
403 subpartition). */
404 s = thr->ts.place_partition_len / nthreads;
405 rest = thr->ts.place_partition_len % nthreads;
406 rest = (s + 1) * rest + thr->ts.place_partition_off;
407 if (p < rest)
409 p -= (p - thr->ts.place_partition_off) % (s + 1);
410 thr->ts.place_partition_len = s + 1;
412 else
414 p -= (p - rest) % s;
415 thr->ts.place_partition_len = s;
417 thr->ts.place_partition_off = p;
419 else
421 /* T > P. Each subpartition will have just a single
422 place and we'll place between s and s+1
423 threads into each subpartition. */
424 s = nthreads / thr->ts.place_partition_len;
425 rest = nthreads % thr->ts.place_partition_len;
426 thr->ts.place_partition_off = p;
427 thr->ts.place_partition_len = 1;
428 k = 1;
430 break;
433 else
434 bind = omp_proc_bind_false;
436 /* We only allow the reuse of idle threads for non-nested PARALLEL
437 regions. This appears to be implied by the semantics of
438 threadprivate variables, but perhaps that's reading too much into
439 things. Certainly it does prevent any locking problems, since
440 only the initial program thread will modify gomp_threads. */
441 if (!nested)
443 old_threads_used = pool->threads_used;
445 if (nthreads <= old_threads_used)
446 n = nthreads;
447 else if (old_threads_used == 0)
449 n = 0;
450 gomp_barrier_init (&pool->threads_dock, nthreads);
452 else
454 n = old_threads_used;
456 /* Increase the barrier threshold to make sure all new
457 threads arrive before the team is released. */
458 gomp_barrier_reinit (&pool->threads_dock, nthreads);
461 /* Not true yet, but soon will be. We're going to release all
462 threads from the dock, and those that aren't part of the
463 team will exit. */
464 pool->threads_used = nthreads;
466 /* If necessary, expand the size of the gomp_threads array. It is
467 expected that changes in the number of threads are rare, thus we
468 make no effort to expand gomp_threads_size geometrically. */
469 if (nthreads >= pool->threads_size)
471 pool->threads_size = nthreads + 1;
472 pool->threads
473 = gomp_realloc (pool->threads,
474 pool->threads_size
475 * sizeof (struct gomp_thread_data *));
478 /* Release existing idle threads. */
479 for (; i < n; ++i)
481 unsigned int place_partition_off = thr->ts.place_partition_off;
482 unsigned int place_partition_len = thr->ts.place_partition_len;
483 unsigned int place = 0;
484 if (__builtin_expect (gomp_places_list != NULL, 0))
486 switch (bind)
488 case omp_proc_bind_true:
489 case omp_proc_bind_close:
490 if (k == s)
492 ++p;
493 if (p == (team->prev_ts.place_partition_off
494 + team->prev_ts.place_partition_len))
495 p = team->prev_ts.place_partition_off;
496 k = 1;
497 if (i == nthreads - rest)
498 s = 1;
500 else
501 ++k;
502 break;
503 case omp_proc_bind_master:
504 break;
505 case omp_proc_bind_spread:
506 if (k == 0)
508 /* T <= P. */
509 if (p < rest)
510 p += s + 1;
511 else
512 p += s;
513 if (p == (team->prev_ts.place_partition_off
514 + team->prev_ts.place_partition_len))
515 p = team->prev_ts.place_partition_off;
516 place_partition_off = p;
517 if (p < rest)
518 place_partition_len = s + 1;
519 else
520 place_partition_len = s;
522 else
524 /* T > P. */
525 if (k == s)
527 ++p;
528 if (p == (team->prev_ts.place_partition_off
529 + team->prev_ts.place_partition_len))
530 p = team->prev_ts.place_partition_off;
531 k = 1;
532 if (i == nthreads - rest)
533 s = 1;
535 else
536 ++k;
537 place_partition_off = p;
538 place_partition_len = 1;
540 break;
542 if (affinity_thr != NULL
543 || (bind != omp_proc_bind_true
544 && pool->threads[i]->place != p + 1)
545 || pool->threads[i]->place <= place_partition_off
546 || pool->threads[i]->place > (place_partition_off
547 + place_partition_len))
549 unsigned int l;
550 if (affinity_thr == NULL)
552 unsigned int j;
554 if (team->prev_ts.place_partition_len > 64)
555 affinity_thr
556 = gomp_malloc (team->prev_ts.place_partition_len
557 * sizeof (struct gomp_thread *));
558 else
559 affinity_thr
560 = gomp_alloca (team->prev_ts.place_partition_len
561 * sizeof (struct gomp_thread *));
562 memset (affinity_thr, '\0',
563 team->prev_ts.place_partition_len
564 * sizeof (struct gomp_thread *));
565 for (j = i; j < old_threads_used; j++)
567 if (pool->threads[j]->place
568 > team->prev_ts.place_partition_off
569 && (pool->threads[j]->place
570 <= (team->prev_ts.place_partition_off
571 + team->prev_ts.place_partition_len)))
573 l = pool->threads[j]->place - 1
574 - team->prev_ts.place_partition_off;
575 pool->threads[j]->data = affinity_thr[l];
576 affinity_thr[l] = pool->threads[j];
578 pool->threads[j] = NULL;
580 if (nthreads > old_threads_used)
581 memset (&pool->threads[old_threads_used],
582 '\0', ((nthreads - old_threads_used)
583 * sizeof (struct gomp_thread *)));
584 n = nthreads;
585 affinity_count = old_threads_used - i;
587 if (affinity_count == 0)
588 break;
589 l = p;
590 if (affinity_thr[l - team->prev_ts.place_partition_off]
591 == NULL)
593 if (bind != omp_proc_bind_true)
594 continue;
595 for (l = place_partition_off;
596 l < place_partition_off + place_partition_len;
597 l++)
598 if (affinity_thr[l - team->prev_ts.place_partition_off]
599 != NULL)
600 break;
601 if (l == place_partition_off + place_partition_len)
602 continue;
604 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
605 affinity_thr[l - team->prev_ts.place_partition_off]
606 = (struct gomp_thread *) nthr->data;
607 affinity_count--;
608 pool->threads[i] = nthr;
610 else
611 nthr = pool->threads[i];
612 place = p + 1;
614 else
615 nthr = pool->threads[i];
616 nthr->ts.team = team;
617 nthr->ts.work_share = &team->work_shares[0];
618 nthr->ts.last_work_share = NULL;
619 nthr->ts.team_id = i;
620 nthr->ts.level = team->prev_ts.level + 1;
621 nthr->ts.active_level = thr->ts.active_level;
622 nthr->ts.place_partition_off = place_partition_off;
623 nthr->ts.place_partition_len = place_partition_len;
624 #ifdef HAVE_SYNC_BUILTINS
625 nthr->ts.single_count = 0;
626 #endif
627 nthr->ts.static_trip = 0;
628 nthr->task = &team->implicit_task[i];
629 nthr->place = place;
630 gomp_init_task (nthr->task, task, icv);
631 team->implicit_task[i].icv.nthreads_var = nthreads_var;
632 team->implicit_task[i].icv.bind_var = bind_var;
633 nthr->fn = fn;
634 nthr->data = data;
635 team->ordered_release[i] = &nthr->release;
638 if (__builtin_expect (affinity_thr != NULL, 0))
640 /* If AFFINITY_THR is non-NULL just because we had to
641 permute some threads in the pool, but we've managed
642 to find exactly as many old threads as we'd find
643 without affinity, we don't need to handle this
644 specially anymore. */
645 if (nthreads <= old_threads_used
646 ? (affinity_count == old_threads_used - nthreads)
647 : (i == old_threads_used))
649 if (team->prev_ts.place_partition_len > 64)
650 free (affinity_thr);
651 affinity_thr = NULL;
652 affinity_count = 0;
654 else
656 i = 1;
657 /* We are going to compute the places/subpartitions
658 again from the beginning. So, we need to reinitialize
659 vars modified by the switch (bind) above inside
660 of the loop, to the state they had after the initial
661 switch (bind). */
662 switch (bind)
664 case omp_proc_bind_true:
665 case omp_proc_bind_close:
666 if (nthreads > thr->ts.place_partition_len)
667 /* T > P. S has been changed, so needs
668 to be recomputed. */
669 s = nthreads / thr->ts.place_partition_len;
670 k = 1;
671 p = thr->place - 1;
672 break;
673 case omp_proc_bind_master:
674 /* No vars have been changed. */
675 break;
676 case omp_proc_bind_spread:
677 p = thr->ts.place_partition_off;
678 if (k != 0)
680 /* T > P. */
681 s = nthreads / team->prev_ts.place_partition_len;
682 k = 1;
684 break;
687 /* Increase the barrier threshold to make sure all new
688 threads and all the threads we're going to let die
689 arrive before the team is released. */
690 if (affinity_count)
691 gomp_barrier_reinit (&pool->threads_dock,
692 nthreads + affinity_count);
696 if (i == nthreads)
697 goto do_release;
701 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
703 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
705 if (old_threads_used == 0)
706 --diff;
708 #ifdef HAVE_SYNC_BUILTINS
709 __sync_fetch_and_add (&gomp_managed_threads, diff);
710 #else
711 gomp_mutex_lock (&gomp_managed_threads_lock);
712 gomp_managed_threads += diff;
713 gomp_mutex_unlock (&gomp_managed_threads_lock);
714 #endif
717 attr = &gomp_thread_attr;
718 if (__builtin_expect (gomp_places_list != NULL, 0))
720 size_t stacksize;
721 pthread_attr_init (&thread_attr);
722 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
723 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
724 pthread_attr_setstacksize (&thread_attr, stacksize);
725 attr = &thread_attr;
728 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
729 * (nthreads-i));
731 /* Launch new threads. */
732 for (; i < nthreads; ++i)
734 pthread_t pt;
735 int err;
737 start_data->ts.place_partition_off = thr->ts.place_partition_off;
738 start_data->ts.place_partition_len = thr->ts.place_partition_len;
739 start_data->place = 0;
740 if (__builtin_expect (gomp_places_list != NULL, 0))
742 switch (bind)
744 case omp_proc_bind_true:
745 case omp_proc_bind_close:
746 if (k == s)
748 ++p;
749 if (p == (team->prev_ts.place_partition_off
750 + team->prev_ts.place_partition_len))
751 p = team->prev_ts.place_partition_off;
752 k = 1;
753 if (i == nthreads - rest)
754 s = 1;
756 else
757 ++k;
758 break;
759 case omp_proc_bind_master:
760 break;
761 case omp_proc_bind_spread:
762 if (k == 0)
764 /* T <= P. */
765 if (p < rest)
766 p += s + 1;
767 else
768 p += s;
769 if (p == (team->prev_ts.place_partition_off
770 + team->prev_ts.place_partition_len))
771 p = team->prev_ts.place_partition_off;
772 start_data->ts.place_partition_off = p;
773 if (p < rest)
774 start_data->ts.place_partition_len = s + 1;
775 else
776 start_data->ts.place_partition_len = s;
778 else
780 /* T > P. */
781 if (k == s)
783 ++p;
784 if (p == (team->prev_ts.place_partition_off
785 + team->prev_ts.place_partition_len))
786 p = team->prev_ts.place_partition_off;
787 k = 1;
788 if (i == nthreads - rest)
789 s = 1;
791 else
792 ++k;
793 start_data->ts.place_partition_off = p;
794 start_data->ts.place_partition_len = 1;
796 break;
798 start_data->place = p + 1;
799 if (affinity_thr != NULL && pool->threads[i] != NULL)
800 continue;
801 gomp_init_thread_affinity (attr, p);
804 start_data->fn = fn;
805 start_data->fn_data = data;
806 start_data->ts.team = team;
807 start_data->ts.work_share = &team->work_shares[0];
808 start_data->ts.last_work_share = NULL;
809 start_data->ts.team_id = i;
810 start_data->ts.level = team->prev_ts.level + 1;
811 start_data->ts.active_level = thr->ts.active_level;
812 #ifdef HAVE_SYNC_BUILTINS
813 start_data->ts.single_count = 0;
814 #endif
815 start_data->ts.static_trip = 0;
816 start_data->task = &team->implicit_task[i];
817 gomp_init_task (start_data->task, task, icv);
818 team->implicit_task[i].icv.nthreads_var = nthreads_var;
819 team->implicit_task[i].icv.bind_var = bind_var;
820 start_data->thread_pool = pool;
821 start_data->nested = nested;
823 err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
824 if (err != 0)
825 gomp_fatal ("Thread creation failed: %s", strerror (err));
828 if (__builtin_expect (gomp_places_list != NULL, 0))
829 pthread_attr_destroy (&thread_attr);
831 do_release:
832 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
834 /* Decrease the barrier threshold to match the number of threads
835 that should arrive back at the end of this team. The extra
836 threads should be exiting. Note that we arrange for this test
837 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
838 the barrier as well as gomp_managed_threads was temporarily
839 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
840 AFFINITY_COUNT if non-zero will be always at least
841 OLD_THREADS_COUNT - NTHREADS. */
842 if (__builtin_expect (nthreads < old_threads_used, 0)
843 || __builtin_expect (affinity_count, 0))
845 long diff = (long) nthreads - (long) old_threads_used;
847 if (affinity_count)
848 diff = -affinity_count;
850 gomp_barrier_reinit (&pool->threads_dock, nthreads);
852 #ifdef HAVE_SYNC_BUILTINS
853 __sync_fetch_and_add (&gomp_managed_threads, diff);
854 #else
855 gomp_mutex_lock (&gomp_managed_threads_lock);
856 gomp_managed_threads += diff;
857 gomp_mutex_unlock (&gomp_managed_threads_lock);
858 #endif
860 if (__builtin_expect (affinity_thr != NULL, 0)
861 && team->prev_ts.place_partition_len > 64)
862 free (affinity_thr);
866 /* Terminate the current team. This is only to be called by the master
867 thread. We assume that we must wait for the other threads. */
869 void
870 gomp_team_end (void)
872 struct gomp_thread *thr = gomp_thread ();
873 struct gomp_team *team = thr->ts.team;
875 /* This barrier handles all pending explicit threads.
876 As #pragma omp cancel parallel might get awaited count in
877 team->barrier in a inconsistent state, we need to use a different
878 counter here. */
879 gomp_team_barrier_wait_final (&team->barrier);
880 if (__builtin_expect (team->team_cancelled, 0))
882 struct gomp_work_share *ws = team->work_shares_to_free;
885 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
886 if (next_ws == NULL)
887 gomp_ptrlock_set (&ws->next_ws, ws);
888 gomp_fini_work_share (ws);
889 ws = next_ws;
891 while (ws != NULL);
893 else
894 gomp_fini_work_share (thr->ts.work_share);
896 gomp_end_task ();
897 thr->ts = team->prev_ts;
899 if (__builtin_expect (thr->ts.team != NULL, 0))
901 #ifdef HAVE_SYNC_BUILTINS
902 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
903 #else
904 gomp_mutex_lock (&gomp_managed_threads_lock);
905 gomp_managed_threads -= team->nthreads - 1L;
906 gomp_mutex_unlock (&gomp_managed_threads_lock);
907 #endif
908 /* This barrier has gomp_barrier_wait_last counterparts
909 and ensures the team can be safely destroyed. */
910 gomp_barrier_wait (&team->barrier);
913 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
915 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
918 struct gomp_work_share *next_ws = ws->next_alloc;
919 free (ws);
920 ws = next_ws;
922 while (ws != NULL);
924 gomp_sem_destroy (&team->master_release);
926 if (__builtin_expect (thr->ts.team != NULL, 0)
927 || __builtin_expect (team->nthreads == 1, 0))
928 free_team (team);
929 else
931 struct gomp_thread_pool *pool = thr->thread_pool;
932 if (pool->last_team)
933 free_team (pool->last_team);
934 pool->last_team = team;
939 /* Constructors for this file. */
941 static void __attribute__((constructor))
942 initialize_team (void)
944 #if !defined HAVE_TLS && !defined USE_EMUTLS
945 static struct gomp_thread initial_thread_tls_data;
947 pthread_key_create (&gomp_tls_key, NULL);
948 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
949 #endif
951 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
952 gomp_fatal ("could not create thread pool destructor.");
955 static void __attribute__((destructor))
956 team_destructor (void)
958 /* Without this dlclose on libgomp could lead to subsequent
959 crashes. */
960 pthread_key_delete (gomp_thread_destructor);
963 struct gomp_task_icv *
964 gomp_new_icv (void)
966 struct gomp_thread *thr = gomp_thread ();
967 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
968 gomp_init_task (task, NULL, &gomp_global_icv);
969 thr->task = task;
970 pthread_setspecific (gomp_thread_destructor, thr);
971 return &task->icv;