[gomp] Thread pool management
[official-gcc.git] / libgomp / team.c
blob274f3ed8a66fbb1ff027364cbc776735b55bd82b
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
34 /* This attribute contains PTHREAD_CREATE_DETACHED. */
35 pthread_attr_t gomp_thread_attr;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 bool nested;
63 /* This function is a pthread_create entry point. This contains the idle
64 loop in which a thread waits to be called up to become part of a team. */
66 static void *
67 gomp_thread_start (void *xdata)
69 struct gomp_thread_start_data *data = xdata;
70 struct gomp_thread *thr;
71 struct gomp_thread_pool *pool;
72 void (*local_fn) (void *);
73 void *local_data;
75 #if defined HAVE_TLS || defined USE_EMUTLS
76 thr = &gomp_tls_data;
77 #else
78 struct gomp_thread local_thr;
79 thr = &local_thr;
80 pthread_setspecific (gomp_tls_key, thr);
81 #endif
82 gomp_sem_init (&thr->release, 0);
84 /* Extract what we need from data. */
85 local_fn = data->fn;
86 local_data = data->fn_data;
87 thr->thread_pool = data->thread_pool;
88 thr->ts = data->ts;
89 thr->task = data->task;
90 thr->place = data->place;
92 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
94 /* Make thread pool local. */
95 pool = thr->thread_pool;
97 if (data->nested)
99 struct gomp_team *team = thr->ts.team;
100 struct gomp_task *task = thr->task;
102 gomp_barrier_wait (&team->barrier);
104 local_fn (local_data);
105 gomp_team_barrier_wait_final (&team->barrier);
106 gomp_finish_task (task);
107 gomp_barrier_wait_last (&team->barrier);
109 else
111 pool->threads[thr->ts.team_id] = thr;
113 gomp_barrier_wait (&pool->threads_dock);
116 struct gomp_team *team = thr->ts.team;
117 struct gomp_task *task = thr->task;
119 local_fn (local_data);
120 gomp_team_barrier_wait_final (&team->barrier);
121 gomp_finish_task (task);
123 gomp_barrier_wait (&pool->threads_dock);
125 local_fn = thr->fn;
126 local_data = thr->data;
127 thr->fn = NULL;
129 while (local_fn);
132 gomp_sem_destroy (&thr->release);
133 thr->thread_pool = NULL;
134 thr->task = NULL;
135 return NULL;
138 static inline struct gomp_team *
139 get_last_team (unsigned nthreads)
141 struct gomp_thread *thr = gomp_thread ();
142 if (thr->ts.team == NULL)
144 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
145 struct gomp_team *last_team = pool->last_team;
146 if (last_team != NULL && last_team->nthreads == nthreads)
148 pool->last_team = NULL;
149 return last_team;
152 return NULL;
155 /* Create a new team data structure. */
157 struct gomp_team *
158 gomp_new_team (unsigned nthreads)
160 struct gomp_team *team;
161 int i;
163 team = get_last_team (nthreads);
164 if (team == NULL)
166 size_t extra = sizeof (team->ordered_release[0])
167 + sizeof (team->implicit_task[0]);
168 team = gomp_malloc (sizeof (*team) + nthreads * extra);
170 #ifndef HAVE_SYNC_BUILTINS
171 gomp_mutex_init (&team->work_share_list_free_lock);
172 #endif
173 gomp_barrier_init (&team->barrier, nthreads);
174 gomp_mutex_init (&team->task_lock);
176 team->nthreads = nthreads;
179 team->work_share_chunk = 8;
180 #ifdef HAVE_SYNC_BUILTINS
181 team->single_count = 0;
182 #endif
183 team->work_shares_to_free = &team->work_shares[0];
184 gomp_init_work_share (&team->work_shares[0], false, nthreads);
185 team->work_shares[0].next_alloc = NULL;
186 team->work_share_list_free = NULL;
187 team->work_share_list_alloc = &team->work_shares[1];
188 for (i = 1; i < 7; i++)
189 team->work_shares[i].next_free = &team->work_shares[i + 1];
190 team->work_shares[i].next_free = NULL;
192 gomp_sem_init (&team->master_release, 0);
193 team->ordered_release = (void *) &team->implicit_task[nthreads];
194 team->ordered_release[0] = &team->master_release;
196 team->task_queue = NULL;
197 team->task_count = 0;
198 team->task_queued_count = 0;
199 team->task_running_count = 0;
200 team->work_share_cancelled = 0;
201 team->team_cancelled = 0;
203 return team;
207 /* Free a team data structure. */
209 static void
210 free_team (struct gomp_team *team)
212 #ifndef HAVE_SYNC_BUILTINS
213 gomp_mutex_destroy (&team->work_share_list_free_lock);
214 #endif
215 gomp_barrier_destroy (&team->barrier);
216 gomp_mutex_destroy (&team->task_lock);
217 free (team);
220 static void
221 gomp_free_pool_helper (void *thread_pool)
223 struct gomp_thread *thr = gomp_thread ();
224 struct gomp_thread_pool *pool
225 = (struct gomp_thread_pool *) thread_pool;
226 gomp_barrier_wait_last (&pool->threads_dock);
227 gomp_sem_destroy (&thr->release);
228 thr->thread_pool = NULL;
229 thr->task = NULL;
230 pthread_exit (NULL);
233 /* Free a thread pool and release its threads. */
235 void
236 gomp_free_thread (void *arg __attribute__((unused)))
238 struct gomp_thread *thr = gomp_thread ();
239 struct gomp_thread_pool *pool = thr->thread_pool;
240 if (pool)
242 if (pool->threads_used > 0)
244 int i;
245 for (i = 1; i < pool->threads_used; i++)
247 struct gomp_thread *nthr = pool->threads[i];
248 nthr->fn = gomp_free_pool_helper;
249 nthr->data = pool;
251 /* This barrier undocks threads docked on pool->threads_dock. */
252 gomp_barrier_wait (&pool->threads_dock);
253 /* And this waits till all threads have called gomp_barrier_wait_last
254 in gomp_free_pool_helper. */
255 gomp_barrier_wait (&pool->threads_dock);
256 /* Now it is safe to destroy the barrier and free the pool. */
257 gomp_barrier_destroy (&pool->threads_dock);
259 #ifdef HAVE_SYNC_BUILTINS
260 __sync_fetch_and_add (&gomp_managed_threads,
261 1L - pool->threads_used);
262 #else
263 gomp_mutex_lock (&gomp_managed_threads_lock);
264 gomp_managed_threads -= pool->threads_used - 1L;
265 gomp_mutex_unlock (&gomp_managed_threads_lock);
266 #endif
268 free (pool->threads);
269 if (pool->last_team)
270 free_team (pool->last_team);
271 free (pool);
272 thr->thread_pool = NULL;
274 if (thr->task != NULL)
276 struct gomp_task *task = thr->task;
277 gomp_end_task ();
278 free (task);
282 /* Launch a team. */
284 void
285 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
286 unsigned flags, struct gomp_team *team)
288 struct gomp_thread_start_data *start_data;
289 struct gomp_thread *thr, *nthr;
290 struct gomp_task *task;
291 struct gomp_task_icv *icv;
292 bool nested;
293 struct gomp_thread_pool *pool;
294 unsigned i, n, old_threads_used = 0;
295 pthread_attr_t thread_attr, *attr;
296 unsigned long nthreads_var;
297 char bind, bind_var;
298 unsigned int s = 0, rest = 0, p = 0, k = 0;
299 unsigned int affinity_count = 0;
300 struct gomp_thread **affinity_thr = NULL;
302 thr = gomp_thread ();
303 nested = thr->ts.team != NULL;
304 pool = thr->thread_pool;
305 task = thr->task;
306 icv = task ? &task->icv : &gomp_global_icv;
307 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
308 gomp_init_affinity ();
310 /* Always save the previous state, even if this isn't a nested team.
311 In particular, we should save any work share state from an outer
312 orphaned work share construct. */
313 team->prev_ts = thr->ts;
315 thr->ts.team = team;
316 thr->ts.team_id = 0;
317 ++thr->ts.level;
318 if (nthreads > 1)
319 ++thr->ts.active_level;
320 thr->ts.work_share = &team->work_shares[0];
321 thr->ts.last_work_share = NULL;
322 #ifdef HAVE_SYNC_BUILTINS
323 thr->ts.single_count = 0;
324 #endif
325 thr->ts.static_trip = 0;
326 thr->task = &team->implicit_task[0];
327 nthreads_var = icv->nthreads_var;
328 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
329 && thr->ts.level < gomp_nthreads_var_list_len)
330 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
331 bind_var = icv->bind_var;
332 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
333 bind_var = flags & 7;
334 bind = bind_var;
335 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
336 && thr->ts.level < gomp_bind_var_list_len)
337 bind_var = gomp_bind_var_list[thr->ts.level];
338 gomp_init_task (thr->task, task, icv);
339 team->implicit_task[0].icv.nthreads_var = nthreads_var;
340 team->implicit_task[0].icv.bind_var = bind_var;
342 if (nthreads == 1)
343 return;
345 i = 1;
347 if (__builtin_expect (gomp_places_list != NULL, 0))
349 /* Depending on chosen proc_bind model, set subpartition
350 for the master thread and initialize helper variables
351 P and optionally S, K and/or REST used by later place
352 computation for each additional thread. */
353 p = thr->place - 1;
354 switch (bind)
356 case omp_proc_bind_true:
357 case omp_proc_bind_close:
358 if (nthreads > thr->ts.place_partition_len)
360 /* T > P. S threads will be placed in each place,
361 and the final REM threads placed one by one
362 into the already occupied places. */
363 s = nthreads / thr->ts.place_partition_len;
364 rest = nthreads % thr->ts.place_partition_len;
366 else
367 s = 1;
368 k = 1;
369 break;
370 case omp_proc_bind_master:
371 /* Each thread will be bound to master's place. */
372 break;
373 case omp_proc_bind_spread:
374 if (nthreads <= thr->ts.place_partition_len)
376 /* T <= P. Each subpartition will have in between s
377 and s+1 places (subpartitions starting at or
378 after rest will have s places, earlier s+1 places),
379 each thread will be bound to the first place in
380 its subpartition (except for the master thread
381 that can be bound to another place in its
382 subpartition). */
383 s = thr->ts.place_partition_len / nthreads;
384 rest = thr->ts.place_partition_len % nthreads;
385 rest = (s + 1) * rest + thr->ts.place_partition_off;
386 if (p < rest)
388 p -= (p - thr->ts.place_partition_off) % (s + 1);
389 thr->ts.place_partition_len = s + 1;
391 else
393 p -= (p - rest) % s;
394 thr->ts.place_partition_len = s;
396 thr->ts.place_partition_off = p;
398 else
400 /* T > P. Each subpartition will have just a single
401 place and we'll place between s and s+1
402 threads into each subpartition. */
403 s = nthreads / thr->ts.place_partition_len;
404 rest = nthreads % thr->ts.place_partition_len;
405 thr->ts.place_partition_off = p;
406 thr->ts.place_partition_len = 1;
407 k = 1;
409 break;
412 else
413 bind = omp_proc_bind_false;
415 /* We only allow the reuse of idle threads for non-nested PARALLEL
416 regions. This appears to be implied by the semantics of
417 threadprivate variables, but perhaps that's reading too much into
418 things. Certainly it does prevent any locking problems, since
419 only the initial program thread will modify gomp_threads. */
420 if (!nested)
422 old_threads_used = pool->threads_used;
424 if (nthreads <= old_threads_used)
425 n = nthreads;
426 else if (old_threads_used == 0)
428 n = 0;
429 gomp_barrier_init (&pool->threads_dock, nthreads);
431 else
433 n = old_threads_used;
435 /* Increase the barrier threshold to make sure all new
436 threads arrive before the team is released. */
437 gomp_barrier_reinit (&pool->threads_dock, nthreads);
440 /* Not true yet, but soon will be. We're going to release all
441 threads from the dock, and those that aren't part of the
442 team will exit. */
443 pool->threads_used = nthreads;
445 /* If necessary, expand the size of the gomp_threads array. It is
446 expected that changes in the number of threads are rare, thus we
447 make no effort to expand gomp_threads_size geometrically. */
448 if (nthreads >= pool->threads_size)
450 pool->threads_size = nthreads + 1;
451 pool->threads
452 = gomp_realloc (pool->threads,
453 pool->threads_size
454 * sizeof (struct gomp_thread_data *));
457 /* Release existing idle threads. */
458 for (; i < n; ++i)
460 unsigned int place_partition_off = thr->ts.place_partition_off;
461 unsigned int place_partition_len = thr->ts.place_partition_len;
462 unsigned int place = 0;
463 if (__builtin_expect (gomp_places_list != NULL, 0))
465 switch (bind)
467 case omp_proc_bind_true:
468 case omp_proc_bind_close:
469 if (k == s)
471 ++p;
472 if (p == (team->prev_ts.place_partition_off
473 + team->prev_ts.place_partition_len))
474 p = team->prev_ts.place_partition_off;
475 k = 1;
476 if (i == nthreads - rest)
477 s = 1;
479 else
480 ++k;
481 break;
482 case omp_proc_bind_master:
483 break;
484 case omp_proc_bind_spread:
485 if (k == 0)
487 /* T <= P. */
488 if (p < rest)
489 p += s + 1;
490 else
491 p += s;
492 if (p == (team->prev_ts.place_partition_off
493 + team->prev_ts.place_partition_len))
494 p = team->prev_ts.place_partition_off;
495 place_partition_off = p;
496 if (p < rest)
497 place_partition_len = s + 1;
498 else
499 place_partition_len = s;
501 else
503 /* T > P. */
504 if (k == s)
506 ++p;
507 if (p == (team->prev_ts.place_partition_off
508 + team->prev_ts.place_partition_len))
509 p = team->prev_ts.place_partition_off;
510 k = 1;
511 if (i == nthreads - rest)
512 s = 1;
514 else
515 ++k;
516 place_partition_off = p;
517 place_partition_len = 1;
519 break;
521 if (affinity_thr != NULL
522 || (bind != omp_proc_bind_true
523 && pool->threads[i]->place != p + 1)
524 || pool->threads[i]->place <= place_partition_off
525 || pool->threads[i]->place > (place_partition_off
526 + place_partition_len))
528 unsigned int l;
529 if (affinity_thr == NULL)
531 unsigned int j;
533 if (team->prev_ts.place_partition_len > 64)
534 affinity_thr
535 = gomp_malloc (team->prev_ts.place_partition_len
536 * sizeof (struct gomp_thread *));
537 else
538 affinity_thr
539 = gomp_alloca (team->prev_ts.place_partition_len
540 * sizeof (struct gomp_thread *));
541 memset (affinity_thr, '\0',
542 team->prev_ts.place_partition_len
543 * sizeof (struct gomp_thread *));
544 for (j = i; j < old_threads_used; j++)
546 if (pool->threads[j]->place
547 > team->prev_ts.place_partition_off
548 && (pool->threads[j]->place
549 <= (team->prev_ts.place_partition_off
550 + team->prev_ts.place_partition_len)))
552 l = pool->threads[j]->place - 1
553 - team->prev_ts.place_partition_off;
554 pool->threads[j]->data = affinity_thr[l];
555 affinity_thr[l] = pool->threads[j];
557 pool->threads[j] = NULL;
559 if (nthreads > old_threads_used)
560 memset (&pool->threads[old_threads_used],
561 '\0', ((nthreads - old_threads_used)
562 * sizeof (struct gomp_thread *)));
563 n = nthreads;
564 affinity_count = old_threads_used - i;
566 if (affinity_count == 0)
567 break;
568 l = p;
569 if (affinity_thr[l - team->prev_ts.place_partition_off]
570 == NULL)
572 if (bind != omp_proc_bind_true)
573 continue;
574 for (l = place_partition_off;
575 l < place_partition_off + place_partition_len;
576 l++)
577 if (affinity_thr[l - team->prev_ts.place_partition_off]
578 != NULL)
579 break;
580 if (l == place_partition_off + place_partition_len)
581 continue;
583 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
584 affinity_thr[l - team->prev_ts.place_partition_off]
585 = (struct gomp_thread *) nthr->data;
586 affinity_count--;
587 pool->threads[i] = nthr;
589 else
590 nthr = pool->threads[i];
591 place = p + 1;
593 else
594 nthr = pool->threads[i];
595 nthr->ts.team = team;
596 nthr->ts.work_share = &team->work_shares[0];
597 nthr->ts.last_work_share = NULL;
598 nthr->ts.team_id = i;
599 nthr->ts.level = team->prev_ts.level + 1;
600 nthr->ts.active_level = thr->ts.active_level;
601 nthr->ts.place_partition_off = place_partition_off;
602 nthr->ts.place_partition_len = place_partition_len;
603 #ifdef HAVE_SYNC_BUILTINS
604 nthr->ts.single_count = 0;
605 #endif
606 nthr->ts.static_trip = 0;
607 nthr->task = &team->implicit_task[i];
608 nthr->place = place;
609 gomp_init_task (nthr->task, task, icv);
610 team->implicit_task[i].icv.nthreads_var = nthreads_var;
611 team->implicit_task[i].icv.bind_var = bind_var;
612 nthr->fn = fn;
613 nthr->data = data;
614 team->ordered_release[i] = &nthr->release;
617 if (__builtin_expect (affinity_thr != NULL, 0))
619 /* If AFFINITY_THR is non-NULL just because we had to
620 permute some threads in the pool, but we've managed
621 to find exactly as many old threads as we'd find
622 without affinity, we don't need to handle this
623 specially anymore. */
624 if (nthreads <= old_threads_used
625 ? (affinity_count == old_threads_used - nthreads)
626 : (i == old_threads_used))
628 if (team->prev_ts.place_partition_len > 64)
629 free (affinity_thr);
630 affinity_thr = NULL;
631 affinity_count = 0;
633 else
635 i = 1;
636 /* We are going to compute the places/subpartitions
637 again from the beginning. So, we need to reinitialize
638 vars modified by the switch (bind) above inside
639 of the loop, to the state they had after the initial
640 switch (bind). */
641 switch (bind)
643 case omp_proc_bind_true:
644 case omp_proc_bind_close:
645 if (nthreads > thr->ts.place_partition_len)
646 /* T > P. S has been changed, so needs
647 to be recomputed. */
648 s = nthreads / thr->ts.place_partition_len;
649 k = 1;
650 p = thr->place - 1;
651 break;
652 case omp_proc_bind_master:
653 /* No vars have been changed. */
654 break;
655 case omp_proc_bind_spread:
656 p = thr->ts.place_partition_off;
657 if (k != 0)
659 /* T > P. */
660 s = nthreads / team->prev_ts.place_partition_len;
661 k = 1;
663 break;
666 /* Increase the barrier threshold to make sure all new
667 threads and all the threads we're going to let die
668 arrive before the team is released. */
669 if (affinity_count)
670 gomp_barrier_reinit (&pool->threads_dock,
671 nthreads + affinity_count);
675 if (i == nthreads)
676 goto do_release;
680 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
682 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
684 if (old_threads_used == 0)
685 --diff;
687 #ifdef HAVE_SYNC_BUILTINS
688 __sync_fetch_and_add (&gomp_managed_threads, diff);
689 #else
690 gomp_mutex_lock (&gomp_managed_threads_lock);
691 gomp_managed_threads += diff;
692 gomp_mutex_unlock (&gomp_managed_threads_lock);
693 #endif
696 attr = &gomp_thread_attr;
697 if (__builtin_expect (gomp_places_list != NULL, 0))
699 size_t stacksize;
700 pthread_attr_init (&thread_attr);
701 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
702 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
703 pthread_attr_setstacksize (&thread_attr, stacksize);
704 attr = &thread_attr;
707 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
708 * (nthreads-i));
710 /* Launch new threads. */
711 for (; i < nthreads; ++i)
713 pthread_t pt;
714 int err;
716 start_data->ts.place_partition_off = thr->ts.place_partition_off;
717 start_data->ts.place_partition_len = thr->ts.place_partition_len;
718 start_data->place = 0;
719 if (__builtin_expect (gomp_places_list != NULL, 0))
721 switch (bind)
723 case omp_proc_bind_true:
724 case omp_proc_bind_close:
725 if (k == s)
727 ++p;
728 if (p == (team->prev_ts.place_partition_off
729 + team->prev_ts.place_partition_len))
730 p = team->prev_ts.place_partition_off;
731 k = 1;
732 if (i == nthreads - rest)
733 s = 1;
735 else
736 ++k;
737 break;
738 case omp_proc_bind_master:
739 break;
740 case omp_proc_bind_spread:
741 if (k == 0)
743 /* T <= P. */
744 if (p < rest)
745 p += s + 1;
746 else
747 p += s;
748 if (p == (team->prev_ts.place_partition_off
749 + team->prev_ts.place_partition_len))
750 p = team->prev_ts.place_partition_off;
751 start_data->ts.place_partition_off = p;
752 if (p < rest)
753 start_data->ts.place_partition_len = s + 1;
754 else
755 start_data->ts.place_partition_len = s;
757 else
759 /* T > P. */
760 if (k == s)
762 ++p;
763 if (p == (team->prev_ts.place_partition_off
764 + team->prev_ts.place_partition_len))
765 p = team->prev_ts.place_partition_off;
766 k = 1;
767 if (i == nthreads - rest)
768 s = 1;
770 else
771 ++k;
772 start_data->ts.place_partition_off = p;
773 start_data->ts.place_partition_len = 1;
775 break;
777 start_data->place = p + 1;
778 if (affinity_thr != NULL && pool->threads[i] != NULL)
779 continue;
780 gomp_init_thread_affinity (attr, p);
783 start_data->fn = fn;
784 start_data->fn_data = data;
785 start_data->ts.team = team;
786 start_data->ts.work_share = &team->work_shares[0];
787 start_data->ts.last_work_share = NULL;
788 start_data->ts.team_id = i;
789 start_data->ts.level = team->prev_ts.level + 1;
790 start_data->ts.active_level = thr->ts.active_level;
791 #ifdef HAVE_SYNC_BUILTINS
792 start_data->ts.single_count = 0;
793 #endif
794 start_data->ts.static_trip = 0;
795 start_data->task = &team->implicit_task[i];
796 gomp_init_task (start_data->task, task, icv);
797 team->implicit_task[i].icv.nthreads_var = nthreads_var;
798 team->implicit_task[i].icv.bind_var = bind_var;
799 start_data->thread_pool = pool;
800 start_data->nested = nested;
802 err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
803 if (err != 0)
804 gomp_fatal ("Thread creation failed: %s", strerror (err));
807 if (__builtin_expect (gomp_places_list != NULL, 0))
808 pthread_attr_destroy (&thread_attr);
810 do_release:
811 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
813 /* Decrease the barrier threshold to match the number of threads
814 that should arrive back at the end of this team. The extra
815 threads should be exiting. Note that we arrange for this test
816 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
817 the barrier as well as gomp_managed_threads was temporarily
818 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
819 AFFINITY_COUNT if non-zero will be always at least
820 OLD_THREADS_COUNT - NTHREADS. */
821 if (__builtin_expect (nthreads < old_threads_used, 0)
822 || __builtin_expect (affinity_count, 0))
824 long diff = (long) nthreads - (long) old_threads_used;
826 if (affinity_count)
827 diff = -affinity_count;
829 gomp_barrier_reinit (&pool->threads_dock, nthreads);
831 #ifdef HAVE_SYNC_BUILTINS
832 __sync_fetch_and_add (&gomp_managed_threads, diff);
833 #else
834 gomp_mutex_lock (&gomp_managed_threads_lock);
835 gomp_managed_threads += diff;
836 gomp_mutex_unlock (&gomp_managed_threads_lock);
837 #endif
839 if (__builtin_expect (affinity_thr != NULL, 0)
840 && team->prev_ts.place_partition_len > 64)
841 free (affinity_thr);
845 /* Terminate the current team. This is only to be called by the master
846 thread. We assume that we must wait for the other threads. */
848 void
849 gomp_team_end (void)
851 struct gomp_thread *thr = gomp_thread ();
852 struct gomp_team *team = thr->ts.team;
854 /* This barrier handles all pending explicit threads.
855 As #pragma omp cancel parallel might get awaited count in
856 team->barrier in a inconsistent state, we need to use a different
857 counter here. */
858 gomp_team_barrier_wait_final (&team->barrier);
859 if (__builtin_expect (team->team_cancelled, 0))
861 struct gomp_work_share *ws = team->work_shares_to_free;
864 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
865 if (next_ws == NULL)
866 gomp_ptrlock_set (&ws->next_ws, ws);
867 gomp_fini_work_share (ws);
868 ws = next_ws;
870 while (ws != NULL);
872 else
873 gomp_fini_work_share (thr->ts.work_share);
875 gomp_end_task ();
876 thr->ts = team->prev_ts;
878 if (__builtin_expect (thr->ts.team != NULL, 0))
880 #ifdef HAVE_SYNC_BUILTINS
881 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
882 #else
883 gomp_mutex_lock (&gomp_managed_threads_lock);
884 gomp_managed_threads -= team->nthreads - 1L;
885 gomp_mutex_unlock (&gomp_managed_threads_lock);
886 #endif
887 /* This barrier has gomp_barrier_wait_last counterparts
888 and ensures the team can be safely destroyed. */
889 gomp_barrier_wait (&team->barrier);
892 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
894 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
897 struct gomp_work_share *next_ws = ws->next_alloc;
898 free (ws);
899 ws = next_ws;
901 while (ws != NULL);
903 gomp_sem_destroy (&team->master_release);
905 if (__builtin_expect (thr->ts.team != NULL, 0)
906 || __builtin_expect (team->nthreads == 1, 0))
907 free_team (team);
908 else
910 struct gomp_thread_pool *pool = thr->thread_pool;
911 if (pool->last_team)
912 free_team (pool->last_team);
913 pool->last_team = team;
914 gomp_release_thread_pool (pool);
919 /* Constructors for this file. */
921 static void __attribute__((constructor))
922 initialize_team (void)
924 #if !defined HAVE_TLS && !defined USE_EMUTLS
925 static struct gomp_thread initial_thread_tls_data;
927 pthread_key_create (&gomp_tls_key, NULL);
928 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
929 #endif
931 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
932 gomp_fatal ("could not create thread pool destructor.");
935 static void __attribute__((destructor))
936 team_destructor (void)
938 /* Without this dlclose on libgomp could lead to subsequent
939 crashes. */
940 pthread_key_delete (gomp_thread_destructor);
943 struct gomp_task_icv *
944 gomp_new_icv (void)
946 struct gomp_thread *thr = gomp_thread ();
947 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
948 gomp_init_task (task, NULL, &gomp_global_icv);
949 thr->task = task;
950 pthread_setspecific (gomp_thread_destructor, thr);
951 return &task->icv;