2015-05-05 Yvan Roux <yvan.roux@linaro.org>
[official-gcc.git] / libgomp / team.c
blobb98b2337434cab0f3f196b75d6fb14cd2c46e722
1 /* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
33 /* This attribute contains PTHREAD_CREATE_DETACHED. */
34 pthread_attr_t gomp_thread_attr;
36 /* This key is for the thread destructor. */
37 pthread_key_t gomp_thread_destructor;
40 /* This is the libgomp per-thread data structure. */
41 #if defined HAVE_TLS || defined USE_EMUTLS
42 __thread struct gomp_thread gomp_tls_data;
43 #else
44 pthread_key_t gomp_tls_key;
45 #endif
48 /* This structure is used to communicate across pthread_create. */
50 struct gomp_thread_start_data
52 void (*fn) (void *);
53 void *fn_data;
54 struct gomp_team_state ts;
55 struct gomp_task *task;
56 struct gomp_thread_pool *thread_pool;
57 unsigned int place;
58 bool nested;
62 /* This function is a pthread_create entry point. This contains the idle
63 loop in which a thread waits to be called up to become part of a team. */
65 static void *
66 gomp_thread_start (void *xdata)
68 struct gomp_thread_start_data *data = xdata;
69 struct gomp_thread *thr;
70 struct gomp_thread_pool *pool;
71 void (*local_fn) (void *);
72 void *local_data;
74 #if defined HAVE_TLS || defined USE_EMUTLS
75 thr = &gomp_tls_data;
76 #else
77 struct gomp_thread local_thr;
78 thr = &local_thr;
79 pthread_setspecific (gomp_tls_key, thr);
80 #endif
81 gomp_sem_init (&thr->release, 0);
83 /* Extract what we need from data. */
84 local_fn = data->fn;
85 local_data = data->fn_data;
86 thr->thread_pool = data->thread_pool;
87 thr->ts = data->ts;
88 thr->task = data->task;
89 thr->place = data->place;
91 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
93 /* Make thread pool local. */
94 pool = thr->thread_pool;
96 if (data->nested)
98 struct gomp_team *team = thr->ts.team;
99 struct gomp_task *task = thr->task;
101 gomp_barrier_wait (&team->barrier);
103 local_fn (local_data);
104 gomp_team_barrier_wait_final (&team->barrier);
105 gomp_finish_task (task);
106 gomp_barrier_wait_last (&team->barrier);
108 else
110 pool->threads[thr->ts.team_id] = thr;
112 gomp_barrier_wait (&pool->threads_dock);
115 struct gomp_team *team = thr->ts.team;
116 struct gomp_task *task = thr->task;
118 local_fn (local_data);
119 gomp_team_barrier_wait_final (&team->barrier);
120 gomp_finish_task (task);
122 gomp_barrier_wait (&pool->threads_dock);
124 local_fn = thr->fn;
125 local_data = thr->data;
126 thr->fn = NULL;
128 while (local_fn);
131 gomp_sem_destroy (&thr->release);
132 thr->thread_pool = NULL;
133 thr->task = NULL;
134 return NULL;
138 /* Create a new team data structure. */
140 struct gomp_team *
141 gomp_new_team (unsigned nthreads)
143 struct gomp_team *team;
144 size_t size;
145 int i;
147 size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
148 + sizeof (team->implicit_task[0]));
149 team = gomp_malloc (size);
151 team->work_share_chunk = 8;
152 #ifdef HAVE_SYNC_BUILTINS
153 team->single_count = 0;
154 #else
155 gomp_mutex_init (&team->work_share_list_free_lock);
156 #endif
157 team->work_shares_to_free = &team->work_shares[0];
158 gomp_init_work_share (&team->work_shares[0], false, nthreads);
159 team->work_shares[0].next_alloc = NULL;
160 team->work_share_list_free = NULL;
161 team->work_share_list_alloc = &team->work_shares[1];
162 for (i = 1; i < 7; i++)
163 team->work_shares[i].next_free = &team->work_shares[i + 1];
164 team->work_shares[i].next_free = NULL;
166 team->nthreads = nthreads;
167 gomp_barrier_init (&team->barrier, nthreads);
169 gomp_sem_init (&team->master_release, 0);
170 team->ordered_release = (void *) &team->implicit_task[nthreads];
171 team->ordered_release[0] = &team->master_release;
173 gomp_mutex_init (&team->task_lock);
174 team->task_queue = NULL;
175 team->task_count = 0;
176 team->task_queued_count = 0;
177 team->task_running_count = 0;
178 team->work_share_cancelled = 0;
179 team->team_cancelled = 0;
181 return team;
185 /* Free a team data structure. */
187 static void
188 free_team (struct gomp_team *team)
190 gomp_barrier_destroy (&team->barrier);
191 gomp_mutex_destroy (&team->task_lock);
192 free (team);
195 /* Allocate and initialize a thread pool. */
197 static struct gomp_thread_pool *gomp_new_thread_pool (void)
199 struct gomp_thread_pool *pool
200 = gomp_malloc (sizeof(struct gomp_thread_pool));
201 pool->threads = NULL;
202 pool->threads_size = 0;
203 pool->threads_used = 0;
204 pool->last_team = NULL;
205 return pool;
208 static void
209 gomp_free_pool_helper (void *thread_pool)
211 struct gomp_thread *thr = gomp_thread ();
212 struct gomp_thread_pool *pool
213 = (struct gomp_thread_pool *) thread_pool;
214 gomp_barrier_wait_last (&pool->threads_dock);
215 gomp_sem_destroy (&thr->release);
216 thr->thread_pool = NULL;
217 thr->task = NULL;
218 pthread_exit (NULL);
221 /* Free a thread pool and release its threads. */
223 void
224 gomp_free_thread (void *arg __attribute__((unused)))
226 struct gomp_thread *thr = gomp_thread ();
227 struct gomp_thread_pool *pool = thr->thread_pool;
228 if (pool)
230 if (pool->threads_used > 0)
232 int i;
233 for (i = 1; i < pool->threads_used; i++)
235 struct gomp_thread *nthr = pool->threads[i];
236 nthr->fn = gomp_free_pool_helper;
237 nthr->data = pool;
239 /* This barrier undocks threads docked on pool->threads_dock. */
240 gomp_barrier_wait (&pool->threads_dock);
241 /* And this waits till all threads have called gomp_barrier_wait_last
242 in gomp_free_pool_helper. */
243 gomp_barrier_wait (&pool->threads_dock);
244 /* Now it is safe to destroy the barrier and free the pool. */
245 gomp_barrier_destroy (&pool->threads_dock);
247 #ifdef HAVE_SYNC_BUILTINS
248 __sync_fetch_and_add (&gomp_managed_threads,
249 1L - pool->threads_used);
250 #else
251 gomp_mutex_lock (&gomp_managed_threads_lock);
252 gomp_managed_threads -= pool->threads_used - 1L;
253 gomp_mutex_unlock (&gomp_managed_threads_lock);
254 #endif
256 free (pool->threads);
257 if (pool->last_team)
258 free_team (pool->last_team);
259 free (pool);
260 thr->thread_pool = NULL;
262 if (thr->task != NULL)
264 struct gomp_task *task = thr->task;
265 gomp_end_task ();
266 free (task);
270 /* Launch a team. */
272 void
273 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
274 unsigned flags, struct gomp_team *team)
276 struct gomp_thread_start_data *start_data;
277 struct gomp_thread *thr, *nthr;
278 struct gomp_task *task;
279 struct gomp_task_icv *icv;
280 bool nested;
281 struct gomp_thread_pool *pool;
282 unsigned i, n, old_threads_used = 0;
283 pthread_attr_t thread_attr, *attr;
284 unsigned long nthreads_var;
285 char bind, bind_var;
286 unsigned int s = 0, rest = 0, p = 0, k = 0;
287 unsigned int affinity_count = 0;
288 struct gomp_thread **affinity_thr = NULL;
290 thr = gomp_thread ();
291 nested = thr->ts.team != NULL;
292 if (__builtin_expect (thr->thread_pool == NULL, 0))
294 thr->thread_pool = gomp_new_thread_pool ();
295 thr->thread_pool->threads_busy = nthreads;
296 pthread_setspecific (gomp_thread_destructor, thr);
298 pool = thr->thread_pool;
299 task = thr->task;
300 icv = task ? &task->icv : &gomp_global_icv;
301 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
302 gomp_init_affinity ();
304 /* Always save the previous state, even if this isn't a nested team.
305 In particular, we should save any work share state from an outer
306 orphaned work share construct. */
307 team->prev_ts = thr->ts;
309 thr->ts.team = team;
310 thr->ts.team_id = 0;
311 ++thr->ts.level;
312 if (nthreads > 1)
313 ++thr->ts.active_level;
314 thr->ts.work_share = &team->work_shares[0];
315 thr->ts.last_work_share = NULL;
316 #ifdef HAVE_SYNC_BUILTINS
317 thr->ts.single_count = 0;
318 #endif
319 thr->ts.static_trip = 0;
320 thr->task = &team->implicit_task[0];
321 nthreads_var = icv->nthreads_var;
322 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
323 && thr->ts.level < gomp_nthreads_var_list_len)
324 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
325 bind_var = icv->bind_var;
326 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
327 bind_var = flags & 7;
328 bind = bind_var;
329 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
330 && thr->ts.level < gomp_bind_var_list_len)
331 bind_var = gomp_bind_var_list[thr->ts.level];
332 gomp_init_task (thr->task, task, icv);
333 team->implicit_task[0].icv.nthreads_var = nthreads_var;
334 team->implicit_task[0].icv.bind_var = bind_var;
336 if (nthreads == 1)
337 return;
339 i = 1;
341 if (__builtin_expect (gomp_places_list != NULL, 0))
343 /* Depending on chosen proc_bind model, set subpartition
344 for the master thread and initialize helper variables
345 P and optionally S, K and/or REST used by later place
346 computation for each additional thread. */
347 p = thr->place - 1;
348 switch (bind)
350 case omp_proc_bind_true:
351 case omp_proc_bind_close:
352 if (nthreads > thr->ts.place_partition_len)
354 /* T > P. S threads will be placed in each place,
355 and the final REM threads placed one by one
356 into the already occupied places. */
357 s = nthreads / thr->ts.place_partition_len;
358 rest = nthreads % thr->ts.place_partition_len;
360 else
361 s = 1;
362 k = 1;
363 break;
364 case omp_proc_bind_master:
365 /* Each thread will be bound to master's place. */
366 break;
367 case omp_proc_bind_spread:
368 if (nthreads <= thr->ts.place_partition_len)
370 /* T <= P. Each subpartition will have in between s
371 and s+1 places (subpartitions starting at or
372 after rest will have s places, earlier s+1 places),
373 each thread will be bound to the first place in
374 its subpartition (except for the master thread
375 that can be bound to another place in its
376 subpartition). */
377 s = thr->ts.place_partition_len / nthreads;
378 rest = thr->ts.place_partition_len % nthreads;
379 rest = (s + 1) * rest + thr->ts.place_partition_off;
380 if (p < rest)
382 p -= (p - thr->ts.place_partition_off) % (s + 1);
383 thr->ts.place_partition_len = s + 1;
385 else
387 p -= (p - rest) % s;
388 thr->ts.place_partition_len = s;
390 thr->ts.place_partition_off = p;
392 else
394 /* T > P. Each subpartition will have just a single
395 place and we'll place between s and s+1
396 threads into each subpartition. */
397 s = nthreads / thr->ts.place_partition_len;
398 rest = nthreads % thr->ts.place_partition_len;
399 thr->ts.place_partition_off = p;
400 thr->ts.place_partition_len = 1;
401 k = 1;
403 break;
406 else
407 bind = omp_proc_bind_false;
409 /* We only allow the reuse of idle threads for non-nested PARALLEL
410 regions. This appears to be implied by the semantics of
411 threadprivate variables, but perhaps that's reading too much into
412 things. Certainly it does prevent any locking problems, since
413 only the initial program thread will modify gomp_threads. */
414 if (!nested)
416 old_threads_used = pool->threads_used;
418 if (nthreads <= old_threads_used)
419 n = nthreads;
420 else if (old_threads_used == 0)
422 n = 0;
423 gomp_barrier_init (&pool->threads_dock, nthreads);
425 else
427 n = old_threads_used;
429 /* Increase the barrier threshold to make sure all new
430 threads arrive before the team is released. */
431 gomp_barrier_reinit (&pool->threads_dock, nthreads);
434 /* Not true yet, but soon will be. We're going to release all
435 threads from the dock, and those that aren't part of the
436 team will exit. */
437 pool->threads_used = nthreads;
439 /* If necessary, expand the size of the gomp_threads array. It is
440 expected that changes in the number of threads are rare, thus we
441 make no effort to expand gomp_threads_size geometrically. */
442 if (nthreads >= pool->threads_size)
444 pool->threads_size = nthreads + 1;
445 pool->threads
446 = gomp_realloc (pool->threads,
447 pool->threads_size
448 * sizeof (struct gomp_thread_data *));
451 /* Release existing idle threads. */
452 for (; i < n; ++i)
454 unsigned int place_partition_off = thr->ts.place_partition_off;
455 unsigned int place_partition_len = thr->ts.place_partition_len;
456 unsigned int place = 0;
457 if (__builtin_expect (gomp_places_list != NULL, 0))
459 switch (bind)
461 case omp_proc_bind_true:
462 case omp_proc_bind_close:
463 if (k == s)
465 ++p;
466 if (p == (team->prev_ts.place_partition_off
467 + team->prev_ts.place_partition_len))
468 p = team->prev_ts.place_partition_off;
469 k = 1;
470 if (i == nthreads - rest)
471 s = 1;
473 else
474 ++k;
475 break;
476 case omp_proc_bind_master:
477 break;
478 case omp_proc_bind_spread:
479 if (k == 0)
481 /* T <= P. */
482 if (p < rest)
483 p += s + 1;
484 else
485 p += s;
486 if (p == (team->prev_ts.place_partition_off
487 + team->prev_ts.place_partition_len))
488 p = team->prev_ts.place_partition_off;
489 place_partition_off = p;
490 if (p < rest)
491 place_partition_len = s + 1;
492 else
493 place_partition_len = s;
495 else
497 /* T > P. */
498 if (k == s)
500 ++p;
501 if (p == (team->prev_ts.place_partition_off
502 + team->prev_ts.place_partition_len))
503 p = team->prev_ts.place_partition_off;
504 k = 1;
505 if (i == nthreads - rest)
506 s = 1;
508 else
509 ++k;
510 place_partition_off = p;
511 place_partition_len = 1;
513 break;
515 if (affinity_thr != NULL
516 || (bind != omp_proc_bind_true
517 && pool->threads[i]->place != p + 1)
518 || pool->threads[i]->place <= place_partition_off
519 || pool->threads[i]->place > (place_partition_off
520 + place_partition_len))
522 unsigned int l;
523 if (affinity_thr == NULL)
525 unsigned int j;
527 if (team->prev_ts.place_partition_len > 64)
528 affinity_thr
529 = gomp_malloc (team->prev_ts.place_partition_len
530 * sizeof (struct gomp_thread *));
531 else
532 affinity_thr
533 = gomp_alloca (team->prev_ts.place_partition_len
534 * sizeof (struct gomp_thread *));
535 memset (affinity_thr, '\0',
536 team->prev_ts.place_partition_len
537 * sizeof (struct gomp_thread *));
538 for (j = i; j < old_threads_used; j++)
540 if (pool->threads[j]->place
541 > team->prev_ts.place_partition_off
542 && (pool->threads[j]->place
543 <= (team->prev_ts.place_partition_off
544 + team->prev_ts.place_partition_len)))
546 l = pool->threads[j]->place - 1
547 - team->prev_ts.place_partition_off;
548 pool->threads[j]->data = affinity_thr[l];
549 affinity_thr[l] = pool->threads[j];
551 pool->threads[j] = NULL;
553 if (nthreads > old_threads_used)
554 memset (&pool->threads[old_threads_used],
555 '\0', ((nthreads - old_threads_used)
556 * sizeof (struct gomp_thread *)));
557 n = nthreads;
558 affinity_count = old_threads_used - i;
560 if (affinity_count == 0)
561 break;
562 l = p;
563 if (affinity_thr[l - team->prev_ts.place_partition_off]
564 == NULL)
566 if (bind != omp_proc_bind_true)
567 continue;
568 for (l = place_partition_off;
569 l < place_partition_off + place_partition_len;
570 l++)
571 if (affinity_thr[l - team->prev_ts.place_partition_off]
572 != NULL)
573 break;
574 if (l == place_partition_off + place_partition_len)
575 continue;
577 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
578 affinity_thr[l - team->prev_ts.place_partition_off]
579 = (struct gomp_thread *) nthr->data;
580 affinity_count--;
581 pool->threads[i] = nthr;
583 else
584 nthr = pool->threads[i];
585 place = p + 1;
587 else
588 nthr = pool->threads[i];
589 nthr->ts.team = team;
590 nthr->ts.work_share = &team->work_shares[0];
591 nthr->ts.last_work_share = NULL;
592 nthr->ts.team_id = i;
593 nthr->ts.level = team->prev_ts.level + 1;
594 nthr->ts.active_level = thr->ts.active_level;
595 nthr->ts.place_partition_off = place_partition_off;
596 nthr->ts.place_partition_len = place_partition_len;
597 #ifdef HAVE_SYNC_BUILTINS
598 nthr->ts.single_count = 0;
599 #endif
600 nthr->ts.static_trip = 0;
601 nthr->task = &team->implicit_task[i];
602 nthr->place = place;
603 gomp_init_task (nthr->task, task, icv);
604 team->implicit_task[i].icv.nthreads_var = nthreads_var;
605 team->implicit_task[i].icv.bind_var = bind_var;
606 nthr->fn = fn;
607 nthr->data = data;
608 team->ordered_release[i] = &nthr->release;
611 if (__builtin_expect (affinity_thr != NULL, 0))
613 /* If AFFINITY_THR is non-NULL just because we had to
614 permute some threads in the pool, but we've managed
615 to find exactly as many old threads as we'd find
616 without affinity, we don't need to handle this
617 specially anymore. */
618 if (nthreads <= old_threads_used
619 ? (affinity_count == old_threads_used - nthreads)
620 : (i == old_threads_used))
622 if (team->prev_ts.place_partition_len > 64)
623 free (affinity_thr);
624 affinity_thr = NULL;
625 affinity_count = 0;
627 else
629 i = 1;
630 /* We are going to compute the places/subpartitions
631 again from the beginning. So, we need to reinitialize
632 vars modified by the switch (bind) above inside
633 of the loop, to the state they had after the initial
634 switch (bind). */
635 switch (bind)
637 case omp_proc_bind_true:
638 case omp_proc_bind_close:
639 if (nthreads > thr->ts.place_partition_len)
640 /* T > P. S has been changed, so needs
641 to be recomputed. */
642 s = nthreads / thr->ts.place_partition_len;
643 k = 1;
644 p = thr->place - 1;
645 break;
646 case omp_proc_bind_master:
647 /* No vars have been changed. */
648 break;
649 case omp_proc_bind_spread:
650 p = thr->ts.place_partition_off;
651 if (k != 0)
653 /* T > P. */
654 s = nthreads / team->prev_ts.place_partition_len;
655 k = 1;
657 break;
660 /* Increase the barrier threshold to make sure all new
661 threads and all the threads we're going to let die
662 arrive before the team is released. */
663 if (affinity_count)
664 gomp_barrier_reinit (&pool->threads_dock,
665 nthreads + affinity_count);
669 if (i == nthreads)
670 goto do_release;
674 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
676 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
678 if (old_threads_used == 0)
679 --diff;
681 #ifdef HAVE_SYNC_BUILTINS
682 __sync_fetch_and_add (&gomp_managed_threads, diff);
683 #else
684 gomp_mutex_lock (&gomp_managed_threads_lock);
685 gomp_managed_threads += diff;
686 gomp_mutex_unlock (&gomp_managed_threads_lock);
687 #endif
690 attr = &gomp_thread_attr;
691 if (__builtin_expect (gomp_places_list != NULL, 0))
693 size_t stacksize;
694 pthread_attr_init (&thread_attr);
695 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
696 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
697 pthread_attr_setstacksize (&thread_attr, stacksize);
698 attr = &thread_attr;
701 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
702 * (nthreads-i));
704 /* Launch new threads. */
705 for (; i < nthreads; ++i)
707 pthread_t pt;
708 int err;
710 start_data->ts.place_partition_off = thr->ts.place_partition_off;
711 start_data->ts.place_partition_len = thr->ts.place_partition_len;
712 start_data->place = 0;
713 if (__builtin_expect (gomp_places_list != NULL, 0))
715 switch (bind)
717 case omp_proc_bind_true:
718 case omp_proc_bind_close:
719 if (k == s)
721 ++p;
722 if (p == (team->prev_ts.place_partition_off
723 + team->prev_ts.place_partition_len))
724 p = team->prev_ts.place_partition_off;
725 k = 1;
726 if (i == nthreads - rest)
727 s = 1;
729 else
730 ++k;
731 break;
732 case omp_proc_bind_master:
733 break;
734 case omp_proc_bind_spread:
735 if (k == 0)
737 /* T <= P. */
738 if (p < rest)
739 p += s + 1;
740 else
741 p += s;
742 if (p == (team->prev_ts.place_partition_off
743 + team->prev_ts.place_partition_len))
744 p = team->prev_ts.place_partition_off;
745 start_data->ts.place_partition_off = p;
746 if (p < rest)
747 start_data->ts.place_partition_len = s + 1;
748 else
749 start_data->ts.place_partition_len = s;
751 else
753 /* T > P. */
754 if (k == s)
756 ++p;
757 if (p == (team->prev_ts.place_partition_off
758 + team->prev_ts.place_partition_len))
759 p = team->prev_ts.place_partition_off;
760 k = 1;
761 if (i == nthreads - rest)
762 s = 1;
764 else
765 ++k;
766 start_data->ts.place_partition_off = p;
767 start_data->ts.place_partition_len = 1;
769 break;
771 start_data->place = p + 1;
772 if (affinity_thr != NULL && pool->threads[i] != NULL)
773 continue;
774 gomp_init_thread_affinity (attr, p);
777 start_data->fn = fn;
778 start_data->fn_data = data;
779 start_data->ts.team = team;
780 start_data->ts.work_share = &team->work_shares[0];
781 start_data->ts.last_work_share = NULL;
782 start_data->ts.team_id = i;
783 start_data->ts.level = team->prev_ts.level + 1;
784 start_data->ts.active_level = thr->ts.active_level;
785 #ifdef HAVE_SYNC_BUILTINS
786 start_data->ts.single_count = 0;
787 #endif
788 start_data->ts.static_trip = 0;
789 start_data->task = &team->implicit_task[i];
790 gomp_init_task (start_data->task, task, icv);
791 team->implicit_task[i].icv.nthreads_var = nthreads_var;
792 team->implicit_task[i].icv.bind_var = bind_var;
793 start_data->thread_pool = pool;
794 start_data->nested = nested;
796 err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
797 if (err != 0)
798 gomp_fatal ("Thread creation failed: %s", strerror (err));
801 if (__builtin_expect (gomp_places_list != NULL, 0))
802 pthread_attr_destroy (&thread_attr);
804 do_release:
805 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
807 /* Decrease the barrier threshold to match the number of threads
808 that should arrive back at the end of this team. The extra
809 threads should be exiting. Note that we arrange for this test
810 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
811 the barrier as well as gomp_managed_threads was temporarily
812 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
813 AFFINITY_COUNT if non-zero will be always at least
814 OLD_THREADS_COUNT - NTHREADS. */
815 if (__builtin_expect (nthreads < old_threads_used, 0)
816 || __builtin_expect (affinity_count, 0))
818 long diff = (long) nthreads - (long) old_threads_used;
820 if (affinity_count)
821 diff = -affinity_count;
823 gomp_barrier_reinit (&pool->threads_dock, nthreads);
825 #ifdef HAVE_SYNC_BUILTINS
826 __sync_fetch_and_add (&gomp_managed_threads, diff);
827 #else
828 gomp_mutex_lock (&gomp_managed_threads_lock);
829 gomp_managed_threads += diff;
830 gomp_mutex_unlock (&gomp_managed_threads_lock);
831 #endif
833 if (__builtin_expect (affinity_thr != NULL, 0)
834 && team->prev_ts.place_partition_len > 64)
835 free (affinity_thr);
839 /* Terminate the current team. This is only to be called by the master
840 thread. We assume that we must wait for the other threads. */
842 void
843 gomp_team_end (void)
845 struct gomp_thread *thr = gomp_thread ();
846 struct gomp_team *team = thr->ts.team;
848 /* This barrier handles all pending explicit threads.
849 As #pragma omp cancel parallel might get awaited count in
850 team->barrier in a inconsistent state, we need to use a different
851 counter here. */
852 gomp_team_barrier_wait_final (&team->barrier);
853 if (__builtin_expect (team->team_cancelled, 0))
855 struct gomp_work_share *ws = team->work_shares_to_free;
858 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
859 if (next_ws == NULL)
860 gomp_ptrlock_set (&ws->next_ws, ws);
861 gomp_fini_work_share (ws);
862 ws = next_ws;
864 while (ws != NULL);
866 else
867 gomp_fini_work_share (thr->ts.work_share);
869 gomp_end_task ();
870 thr->ts = team->prev_ts;
872 if (__builtin_expect (thr->ts.team != NULL, 0))
874 #ifdef HAVE_SYNC_BUILTINS
875 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
876 #else
877 gomp_mutex_lock (&gomp_managed_threads_lock);
878 gomp_managed_threads -= team->nthreads - 1L;
879 gomp_mutex_unlock (&gomp_managed_threads_lock);
880 #endif
881 /* This barrier has gomp_barrier_wait_last counterparts
882 and ensures the team can be safely destroyed. */
883 gomp_barrier_wait (&team->barrier);
886 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
888 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
891 struct gomp_work_share *next_ws = ws->next_alloc;
892 free (ws);
893 ws = next_ws;
895 while (ws != NULL);
897 gomp_sem_destroy (&team->master_release);
898 #ifndef HAVE_SYNC_BUILTINS
899 gomp_mutex_destroy (&team->work_share_list_free_lock);
900 #endif
902 if (__builtin_expect (thr->ts.team != NULL, 0)
903 || __builtin_expect (team->nthreads == 1, 0))
904 free_team (team);
905 else
907 struct gomp_thread_pool *pool = thr->thread_pool;
908 if (pool->last_team)
909 free_team (pool->last_team);
910 pool->last_team = team;
915 /* Constructors for this file. */
917 static void __attribute__((constructor))
918 initialize_team (void)
920 #if !defined HAVE_TLS && !defined USE_EMUTLS
921 static struct gomp_thread initial_thread_tls_data;
923 pthread_key_create (&gomp_tls_key, NULL);
924 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
925 #endif
927 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
928 gomp_fatal ("could not create thread pool destructor.");
931 static void __attribute__((destructor))
932 team_destructor (void)
934 /* Without this dlclose on libgomp could lead to subsequent
935 crashes. */
936 pthread_key_delete (gomp_thread_destructor);
939 struct gomp_task_icv *
940 gomp_new_icv (void)
942 struct gomp_thread *thr = gomp_thread ();
943 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
944 gomp_init_task (task, NULL, &gomp_global_icv);
945 thr->task = task;
946 pthread_setspecific (gomp_thread_destructor, thr);
947 return &task->icv;