PR rtl-optimization/88470
[official-gcc.git] / libgomp / team.c
blobe3e4c4d1ef2771c260ce7239e02422a873bdac8f
1 /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintainence of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 bool nested;
60 pthread_t handle;
64 /* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
67 static void *
68 gomp_thread_start (void *xdata)
70 struct gomp_thread_start_data *data = xdata;
71 struct gomp_thread *thr;
72 struct gomp_thread_pool *pool;
73 void (*local_fn) (void *);
74 void *local_data;
76 #if defined HAVE_TLS || defined USE_EMUTLS
77 thr = &gomp_tls_data;
78 #else
79 struct gomp_thread local_thr;
80 thr = &local_thr;
81 pthread_setspecific (gomp_tls_key, thr);
82 #endif
83 gomp_sem_init (&thr->release, 0);
85 /* Extract what we need from data. */
86 local_fn = data->fn;
87 local_data = data->fn_data;
88 thr->thread_pool = data->thread_pool;
89 thr->ts = data->ts;
90 thr->task = data->task;
91 thr->place = data->place;
92 #ifdef GOMP_NEEDS_THREAD_HANDLE
93 thr->handle = data->handle;
94 #endif
96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
98 /* Make thread pool local. */
99 pool = thr->thread_pool;
101 if (data->nested)
103 struct gomp_team *team = thr->ts.team;
104 struct gomp_task *task = thr->task;
106 gomp_barrier_wait (&team->barrier);
108 local_fn (local_data);
109 gomp_team_barrier_wait_final (&team->barrier);
110 gomp_finish_task (task);
111 gomp_barrier_wait_last (&team->barrier);
113 else
115 pool->threads[thr->ts.team_id] = thr;
117 gomp_simple_barrier_wait (&pool->threads_dock);
120 struct gomp_team *team = thr->ts.team;
121 struct gomp_task *task = thr->task;
123 local_fn (local_data);
124 gomp_team_barrier_wait_final (&team->barrier);
125 gomp_finish_task (task);
127 gomp_simple_barrier_wait (&pool->threads_dock);
129 local_fn = thr->fn;
130 local_data = thr->data;
131 thr->fn = NULL;
133 while (local_fn);
136 gomp_sem_destroy (&thr->release);
137 pthread_detach (pthread_self ());
138 thr->thread_pool = NULL;
139 thr->task = NULL;
140 return NULL;
142 #endif
144 static inline struct gomp_team *
145 get_last_team (unsigned nthreads)
147 struct gomp_thread *thr = gomp_thread ();
148 if (thr->ts.team == NULL)
150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151 struct gomp_team *last_team = pool->last_team;
152 if (last_team != NULL && last_team->nthreads == nthreads)
154 pool->last_team = NULL;
155 return last_team;
158 return NULL;
161 /* Create a new team data structure. */
163 struct gomp_team *
164 gomp_new_team (unsigned nthreads)
166 struct gomp_team *team;
167 int i;
169 team = get_last_team (nthreads);
170 if (team == NULL)
172 size_t extra = sizeof (team->ordered_release[0])
173 + sizeof (team->implicit_task[0]);
174 team = gomp_malloc (sizeof (*team) + nthreads * extra);
176 #ifndef HAVE_SYNC_BUILTINS
177 gomp_mutex_init (&team->work_share_list_free_lock);
178 #endif
179 gomp_barrier_init (&team->barrier, nthreads);
180 gomp_mutex_init (&team->task_lock);
182 team->nthreads = nthreads;
185 team->work_share_chunk = 8;
186 #ifdef HAVE_SYNC_BUILTINS
187 team->single_count = 0;
188 #endif
189 team->work_shares_to_free = &team->work_shares[0];
190 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
191 team->work_shares[0].next_alloc = NULL;
192 team->work_share_list_free = NULL;
193 team->work_share_list_alloc = &team->work_shares[1];
194 for (i = 1; i < 7; i++)
195 team->work_shares[i].next_free = &team->work_shares[i + 1];
196 team->work_shares[i].next_free = NULL;
198 gomp_sem_init (&team->master_release, 0);
199 team->ordered_release = (void *) &team->implicit_task[nthreads];
200 team->ordered_release[0] = &team->master_release;
202 priority_queue_init (&team->task_queue);
203 team->task_count = 0;
204 team->task_queued_count = 0;
205 team->task_running_count = 0;
206 team->work_share_cancelled = 0;
207 team->team_cancelled = 0;
209 return team;
213 /* Free a team data structure. */
215 static void
216 free_team (struct gomp_team *team)
218 #ifndef HAVE_SYNC_BUILTINS
219 gomp_mutex_destroy (&team->work_share_list_free_lock);
220 #endif
221 gomp_barrier_destroy (&team->barrier);
222 gomp_mutex_destroy (&team->task_lock);
223 priority_queue_free (&team->task_queue);
224 free (team);
227 static void
228 gomp_free_pool_helper (void *thread_pool)
230 struct gomp_thread *thr = gomp_thread ();
231 struct gomp_thread_pool *pool
232 = (struct gomp_thread_pool *) thread_pool;
233 gomp_simple_barrier_wait_last (&pool->threads_dock);
234 gomp_sem_destroy (&thr->release);
235 thr->thread_pool = NULL;
236 thr->task = NULL;
237 #ifdef LIBGOMP_USE_PTHREADS
238 pthread_detach (pthread_self ());
239 pthread_exit (NULL);
240 #elif defined(__nvptx__)
241 asm ("exit;");
242 #else
243 #error gomp_free_pool_helper must terminate the thread
244 #endif
247 /* Free a thread pool and release its threads. */
249 void
250 gomp_free_thread (void *arg __attribute__((unused)))
252 struct gomp_thread *thr = gomp_thread ();
253 struct gomp_thread_pool *pool = thr->thread_pool;
254 if (pool)
256 if (pool->threads_used > 0)
258 int i;
259 for (i = 1; i < pool->threads_used; i++)
261 struct gomp_thread *nthr = pool->threads[i];
262 nthr->fn = gomp_free_pool_helper;
263 nthr->data = pool;
265 /* This barrier undocks threads docked on pool->threads_dock. */
266 gomp_simple_barrier_wait (&pool->threads_dock);
267 /* And this waits till all threads have called gomp_barrier_wait_last
268 in gomp_free_pool_helper. */
269 gomp_simple_barrier_wait (&pool->threads_dock);
270 /* Now it is safe to destroy the barrier and free the pool. */
271 gomp_simple_barrier_destroy (&pool->threads_dock);
273 #ifdef HAVE_SYNC_BUILTINS
274 __sync_fetch_and_add (&gomp_managed_threads,
275 1L - pool->threads_used);
276 #else
277 gomp_mutex_lock (&gomp_managed_threads_lock);
278 gomp_managed_threads -= pool->threads_used - 1L;
279 gomp_mutex_unlock (&gomp_managed_threads_lock);
280 #endif
282 if (pool->last_team)
283 free_team (pool->last_team);
284 #ifndef __nvptx__
285 free (pool->threads);
286 free (pool);
287 #endif
288 thr->thread_pool = NULL;
290 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
291 gomp_team_end ();
292 if (thr->task != NULL)
294 struct gomp_task *task = thr->task;
295 gomp_end_task ();
296 free (task);
300 /* Launch a team. */
302 #ifdef LIBGOMP_USE_PTHREADS
303 void
304 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
305 unsigned flags, struct gomp_team *team,
306 struct gomp_taskgroup *taskgroup)
308 struct gomp_thread_start_data *start_data;
309 struct gomp_thread *thr, *nthr;
310 struct gomp_task *task;
311 struct gomp_task_icv *icv;
312 bool nested;
313 struct gomp_thread_pool *pool;
314 unsigned i, n, old_threads_used = 0;
315 pthread_attr_t thread_attr, *attr;
316 unsigned long nthreads_var;
317 char bind, bind_var;
318 unsigned int s = 0, rest = 0, p = 0, k = 0;
319 unsigned int affinity_count = 0;
320 struct gomp_thread **affinity_thr = NULL;
321 bool force_display = false;
323 thr = gomp_thread ();
324 nested = thr->ts.level;
325 pool = thr->thread_pool;
326 task = thr->task;
327 icv = task ? &task->icv : &gomp_global_icv;
328 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
330 gomp_init_affinity ();
331 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
332 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
333 thr->place);
336 /* Always save the previous state, even if this isn't a nested team.
337 In particular, we should save any work share state from an outer
338 orphaned work share construct. */
339 team->prev_ts = thr->ts;
341 thr->ts.team = team;
342 thr->ts.team_id = 0;
343 ++thr->ts.level;
344 if (nthreads > 1)
345 ++thr->ts.active_level;
346 thr->ts.work_share = &team->work_shares[0];
347 thr->ts.last_work_share = NULL;
348 #ifdef HAVE_SYNC_BUILTINS
349 thr->ts.single_count = 0;
350 #endif
351 thr->ts.static_trip = 0;
352 thr->task = &team->implicit_task[0];
353 #ifdef GOMP_NEEDS_THREAD_HANDLE
354 thr->handle = pthread_self ();
355 #endif
356 nthreads_var = icv->nthreads_var;
357 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
358 && thr->ts.level < gomp_nthreads_var_list_len)
359 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
360 bind_var = icv->bind_var;
361 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
362 bind_var = flags & 7;
363 bind = bind_var;
364 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
365 && thr->ts.level < gomp_bind_var_list_len)
366 bind_var = gomp_bind_var_list[thr->ts.level];
367 gomp_init_task (thr->task, task, icv);
368 thr->task->taskgroup = taskgroup;
369 team->implicit_task[0].icv.nthreads_var = nthreads_var;
370 team->implicit_task[0].icv.bind_var = bind_var;
372 if (nthreads == 1)
373 return;
375 i = 1;
377 if (__builtin_expect (gomp_places_list != NULL, 0))
379 /* Depending on chosen proc_bind model, set subpartition
380 for the master thread and initialize helper variables
381 P and optionally S, K and/or REST used by later place
382 computation for each additional thread. */
383 p = thr->place - 1;
384 switch (bind)
386 case omp_proc_bind_true:
387 case omp_proc_bind_close:
388 if (nthreads > thr->ts.place_partition_len)
390 /* T > P. S threads will be placed in each place,
391 and the final REM threads placed one by one
392 into the already occupied places. */
393 s = nthreads / thr->ts.place_partition_len;
394 rest = nthreads % thr->ts.place_partition_len;
396 else
397 s = 1;
398 k = 1;
399 break;
400 case omp_proc_bind_master:
401 /* Each thread will be bound to master's place. */
402 break;
403 case omp_proc_bind_spread:
404 if (nthreads <= thr->ts.place_partition_len)
406 /* T <= P. Each subpartition will have in between s
407 and s+1 places (subpartitions starting at or
408 after rest will have s places, earlier s+1 places),
409 each thread will be bound to the first place in
410 its subpartition (except for the master thread
411 that can be bound to another place in its
412 subpartition). */
413 s = thr->ts.place_partition_len / nthreads;
414 rest = thr->ts.place_partition_len % nthreads;
415 rest = (s + 1) * rest + thr->ts.place_partition_off;
416 if (p < rest)
418 p -= (p - thr->ts.place_partition_off) % (s + 1);
419 thr->ts.place_partition_len = s + 1;
421 else
423 p -= (p - rest) % s;
424 thr->ts.place_partition_len = s;
426 thr->ts.place_partition_off = p;
428 else
430 /* T > P. Each subpartition will have just a single
431 place and we'll place between s and s+1
432 threads into each subpartition. */
433 s = nthreads / thr->ts.place_partition_len;
434 rest = nthreads % thr->ts.place_partition_len;
435 thr->ts.place_partition_off = p;
436 thr->ts.place_partition_len = 1;
437 k = 1;
439 break;
442 else
443 bind = omp_proc_bind_false;
445 /* We only allow the reuse of idle threads for non-nested PARALLEL
446 regions. This appears to be implied by the semantics of
447 threadprivate variables, but perhaps that's reading too much into
448 things. Certainly it does prevent any locking problems, since
449 only the initial program thread will modify gomp_threads. */
450 if (!nested)
452 old_threads_used = pool->threads_used;
454 if (nthreads <= old_threads_used)
455 n = nthreads;
456 else if (old_threads_used == 0)
458 n = 0;
459 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
461 else
463 n = old_threads_used;
465 /* Increase the barrier threshold to make sure all new
466 threads arrive before the team is released. */
467 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
470 /* Not true yet, but soon will be. We're going to release all
471 threads from the dock, and those that aren't part of the
472 team will exit. */
473 pool->threads_used = nthreads;
475 /* If necessary, expand the size of the gomp_threads array. It is
476 expected that changes in the number of threads are rare, thus we
477 make no effort to expand gomp_threads_size geometrically. */
478 if (nthreads >= pool->threads_size)
480 pool->threads_size = nthreads + 1;
481 pool->threads
482 = gomp_realloc (pool->threads,
483 pool->threads_size
484 * sizeof (struct gomp_thread *));
487 /* Release existing idle threads. */
488 for (; i < n; ++i)
490 unsigned int place_partition_off = thr->ts.place_partition_off;
491 unsigned int place_partition_len = thr->ts.place_partition_len;
492 unsigned int place = 0;
493 if (__builtin_expect (gomp_places_list != NULL, 0))
495 switch (bind)
497 case omp_proc_bind_true:
498 case omp_proc_bind_close:
499 if (k == s)
501 ++p;
502 if (p == (team->prev_ts.place_partition_off
503 + team->prev_ts.place_partition_len))
504 p = team->prev_ts.place_partition_off;
505 k = 1;
506 if (i == nthreads - rest)
507 s = 1;
509 else
510 ++k;
511 break;
512 case omp_proc_bind_master:
513 break;
514 case omp_proc_bind_spread:
515 if (k == 0)
517 /* T <= P. */
518 if (p < rest)
519 p += s + 1;
520 else
521 p += s;
522 if (p == (team->prev_ts.place_partition_off
523 + team->prev_ts.place_partition_len))
524 p = team->prev_ts.place_partition_off;
525 place_partition_off = p;
526 if (p < rest)
527 place_partition_len = s + 1;
528 else
529 place_partition_len = s;
531 else
533 /* T > P. */
534 if (k == s)
536 ++p;
537 if (p == (team->prev_ts.place_partition_off
538 + team->prev_ts.place_partition_len))
539 p = team->prev_ts.place_partition_off;
540 k = 1;
541 if (i == nthreads - rest)
542 s = 1;
544 else
545 ++k;
546 place_partition_off = p;
547 place_partition_len = 1;
549 break;
551 if (affinity_thr != NULL
552 || (bind != omp_proc_bind_true
553 && pool->threads[i]->place != p + 1)
554 || pool->threads[i]->place <= place_partition_off
555 || pool->threads[i]->place > (place_partition_off
556 + place_partition_len))
558 unsigned int l;
559 force_display = true;
560 if (affinity_thr == NULL)
562 unsigned int j;
564 if (team->prev_ts.place_partition_len > 64)
565 affinity_thr
566 = gomp_malloc (team->prev_ts.place_partition_len
567 * sizeof (struct gomp_thread *));
568 else
569 affinity_thr
570 = gomp_alloca (team->prev_ts.place_partition_len
571 * sizeof (struct gomp_thread *));
572 memset (affinity_thr, '\0',
573 team->prev_ts.place_partition_len
574 * sizeof (struct gomp_thread *));
575 for (j = i; j < old_threads_used; j++)
577 if (pool->threads[j]->place
578 > team->prev_ts.place_partition_off
579 && (pool->threads[j]->place
580 <= (team->prev_ts.place_partition_off
581 + team->prev_ts.place_partition_len)))
583 l = pool->threads[j]->place - 1
584 - team->prev_ts.place_partition_off;
585 pool->threads[j]->data = affinity_thr[l];
586 affinity_thr[l] = pool->threads[j];
588 pool->threads[j] = NULL;
590 if (nthreads > old_threads_used)
591 memset (&pool->threads[old_threads_used],
592 '\0', ((nthreads - old_threads_used)
593 * sizeof (struct gomp_thread *)));
594 n = nthreads;
595 affinity_count = old_threads_used - i;
597 if (affinity_count == 0)
598 break;
599 l = p;
600 if (affinity_thr[l - team->prev_ts.place_partition_off]
601 == NULL)
603 if (bind != omp_proc_bind_true)
604 continue;
605 for (l = place_partition_off;
606 l < place_partition_off + place_partition_len;
607 l++)
608 if (affinity_thr[l - team->prev_ts.place_partition_off]
609 != NULL)
610 break;
611 if (l == place_partition_off + place_partition_len)
612 continue;
614 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
615 affinity_thr[l - team->prev_ts.place_partition_off]
616 = (struct gomp_thread *) nthr->data;
617 affinity_count--;
618 pool->threads[i] = nthr;
620 else
621 nthr = pool->threads[i];
622 place = p + 1;
624 else
625 nthr = pool->threads[i];
626 nthr->ts.team = team;
627 nthr->ts.work_share = &team->work_shares[0];
628 nthr->ts.last_work_share = NULL;
629 nthr->ts.team_id = i;
630 nthr->ts.level = team->prev_ts.level + 1;
631 nthr->ts.active_level = thr->ts.active_level;
632 nthr->ts.place_partition_off = place_partition_off;
633 nthr->ts.place_partition_len = place_partition_len;
634 #ifdef HAVE_SYNC_BUILTINS
635 nthr->ts.single_count = 0;
636 #endif
637 nthr->ts.static_trip = 0;
638 nthr->task = &team->implicit_task[i];
639 nthr->place = place;
640 gomp_init_task (nthr->task, task, icv);
641 team->implicit_task[i].icv.nthreads_var = nthreads_var;
642 team->implicit_task[i].icv.bind_var = bind_var;
643 nthr->task->taskgroup = taskgroup;
644 nthr->fn = fn;
645 nthr->data = data;
646 team->ordered_release[i] = &nthr->release;
649 if (__builtin_expect (affinity_thr != NULL, 0))
651 /* If AFFINITY_THR is non-NULL just because we had to
652 permute some threads in the pool, but we've managed
653 to find exactly as many old threads as we'd find
654 without affinity, we don't need to handle this
655 specially anymore. */
656 if (nthreads <= old_threads_used
657 ? (affinity_count == old_threads_used - nthreads)
658 : (i == old_threads_used))
660 if (team->prev_ts.place_partition_len > 64)
661 free (affinity_thr);
662 affinity_thr = NULL;
663 affinity_count = 0;
665 else
667 i = 1;
668 /* We are going to compute the places/subpartitions
669 again from the beginning. So, we need to reinitialize
670 vars modified by the switch (bind) above inside
671 of the loop, to the state they had after the initial
672 switch (bind). */
673 switch (bind)
675 case omp_proc_bind_true:
676 case omp_proc_bind_close:
677 if (nthreads > thr->ts.place_partition_len)
678 /* T > P. S has been changed, so needs
679 to be recomputed. */
680 s = nthreads / thr->ts.place_partition_len;
681 k = 1;
682 p = thr->place - 1;
683 break;
684 case omp_proc_bind_master:
685 /* No vars have been changed. */
686 break;
687 case omp_proc_bind_spread:
688 p = thr->ts.place_partition_off;
689 if (k != 0)
691 /* T > P. */
692 s = nthreads / team->prev_ts.place_partition_len;
693 k = 1;
695 break;
698 /* Increase the barrier threshold to make sure all new
699 threads and all the threads we're going to let die
700 arrive before the team is released. */
701 if (affinity_count)
702 gomp_simple_barrier_reinit (&pool->threads_dock,
703 nthreads + affinity_count);
707 if (i == nthreads)
708 goto do_release;
712 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
714 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
716 if (old_threads_used == 0)
717 --diff;
719 #ifdef HAVE_SYNC_BUILTINS
720 __sync_fetch_and_add (&gomp_managed_threads, diff);
721 #else
722 gomp_mutex_lock (&gomp_managed_threads_lock);
723 gomp_managed_threads += diff;
724 gomp_mutex_unlock (&gomp_managed_threads_lock);
725 #endif
728 attr = &gomp_thread_attr;
729 if (__builtin_expect (gomp_places_list != NULL, 0))
731 size_t stacksize;
732 pthread_attr_init (&thread_attr);
733 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
734 pthread_attr_setstacksize (&thread_attr, stacksize);
735 attr = &thread_attr;
738 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
739 * (nthreads - i));
741 /* Launch new threads. */
742 for (; i < nthreads; ++i)
744 int err;
746 start_data->ts.place_partition_off = thr->ts.place_partition_off;
747 start_data->ts.place_partition_len = thr->ts.place_partition_len;
748 start_data->place = 0;
749 if (__builtin_expect (gomp_places_list != NULL, 0))
751 switch (bind)
753 case omp_proc_bind_true:
754 case omp_proc_bind_close:
755 if (k == s)
757 ++p;
758 if (p == (team->prev_ts.place_partition_off
759 + team->prev_ts.place_partition_len))
760 p = team->prev_ts.place_partition_off;
761 k = 1;
762 if (i == nthreads - rest)
763 s = 1;
765 else
766 ++k;
767 break;
768 case omp_proc_bind_master:
769 break;
770 case omp_proc_bind_spread:
771 if (k == 0)
773 /* T <= P. */
774 if (p < rest)
775 p += s + 1;
776 else
777 p += s;
778 if (p == (team->prev_ts.place_partition_off
779 + team->prev_ts.place_partition_len))
780 p = team->prev_ts.place_partition_off;
781 start_data->ts.place_partition_off = p;
782 if (p < rest)
783 start_data->ts.place_partition_len = s + 1;
784 else
785 start_data->ts.place_partition_len = s;
787 else
789 /* T > P. */
790 if (k == s)
792 ++p;
793 if (p == (team->prev_ts.place_partition_off
794 + team->prev_ts.place_partition_len))
795 p = team->prev_ts.place_partition_off;
796 k = 1;
797 if (i == nthreads - rest)
798 s = 1;
800 else
801 ++k;
802 start_data->ts.place_partition_off = p;
803 start_data->ts.place_partition_len = 1;
805 break;
807 start_data->place = p + 1;
808 if (affinity_thr != NULL && pool->threads[i] != NULL)
809 continue;
810 gomp_init_thread_affinity (attr, p);
813 start_data->fn = fn;
814 start_data->fn_data = data;
815 start_data->ts.team = team;
816 start_data->ts.work_share = &team->work_shares[0];
817 start_data->ts.last_work_share = NULL;
818 start_data->ts.team_id = i;
819 start_data->ts.level = team->prev_ts.level + 1;
820 start_data->ts.active_level = thr->ts.active_level;
821 #ifdef HAVE_SYNC_BUILTINS
822 start_data->ts.single_count = 0;
823 #endif
824 start_data->ts.static_trip = 0;
825 start_data->task = &team->implicit_task[i];
826 gomp_init_task (start_data->task, task, icv);
827 team->implicit_task[i].icv.nthreads_var = nthreads_var;
828 team->implicit_task[i].icv.bind_var = bind_var;
829 start_data->task->taskgroup = taskgroup;
830 start_data->thread_pool = pool;
831 start_data->nested = nested;
833 attr = gomp_adjust_thread_attr (attr, &thread_attr);
834 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
835 start_data);
836 start_data++;
837 if (err != 0)
838 gomp_fatal ("Thread creation failed: %s", strerror (err));
841 if (__builtin_expect (attr == &thread_attr, 0))
842 pthread_attr_destroy (&thread_attr);
844 do_release:
845 if (nested)
846 gomp_barrier_wait (&team->barrier);
847 else
848 gomp_simple_barrier_wait (&pool->threads_dock);
850 /* Decrease the barrier threshold to match the number of threads
851 that should arrive back at the end of this team. The extra
852 threads should be exiting. Note that we arrange for this test
853 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
854 the barrier as well as gomp_managed_threads was temporarily
855 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
856 AFFINITY_COUNT if non-zero will be always at least
857 OLD_THREADS_COUNT - NTHREADS. */
858 if (__builtin_expect (nthreads < old_threads_used, 0)
859 || __builtin_expect (affinity_count, 0))
861 long diff = (long) nthreads - (long) old_threads_used;
863 if (affinity_count)
864 diff = -affinity_count;
866 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
868 #ifdef HAVE_SYNC_BUILTINS
869 __sync_fetch_and_add (&gomp_managed_threads, diff);
870 #else
871 gomp_mutex_lock (&gomp_managed_threads_lock);
872 gomp_managed_threads += diff;
873 gomp_mutex_unlock (&gomp_managed_threads_lock);
874 #endif
876 if (__builtin_expect (gomp_display_affinity_var, 0))
878 if (nested
879 || nthreads != old_threads_used
880 || force_display)
882 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
883 thr->place);
884 if (nested)
886 start_data -= nthreads - 1;
887 for (i = 1; i < nthreads; ++i)
889 gomp_display_affinity_thread (
890 #ifdef LIBGOMP_USE_PTHREADS
891 start_data->handle,
892 #else
893 gomp_thread_self (),
894 #endif
895 &start_data->ts,
896 start_data->place);
897 start_data++;
900 else
902 for (i = 1; i < nthreads; ++i)
904 gomp_thread_handle handle
905 = gomp_thread_to_pthread_t (pool->threads[i]);
906 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
907 pool->threads[i]->place);
912 if (__builtin_expect (affinity_thr != NULL, 0)
913 && team->prev_ts.place_partition_len > 64)
914 free (affinity_thr);
916 #endif
919 /* Terminate the current team. This is only to be called by the master
920 thread. We assume that we must wait for the other threads. */
922 void
923 gomp_team_end (void)
925 struct gomp_thread *thr = gomp_thread ();
926 struct gomp_team *team = thr->ts.team;
928 /* This barrier handles all pending explicit threads.
929 As #pragma omp cancel parallel might get awaited count in
930 team->barrier in a inconsistent state, we need to use a different
931 counter here. */
932 gomp_team_barrier_wait_final (&team->barrier);
933 if (__builtin_expect (team->team_cancelled, 0))
935 struct gomp_work_share *ws = team->work_shares_to_free;
938 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
939 if (next_ws == NULL)
940 gomp_ptrlock_set (&ws->next_ws, ws);
941 gomp_fini_work_share (ws);
942 ws = next_ws;
944 while (ws != NULL);
946 else
947 gomp_fini_work_share (thr->ts.work_share);
949 gomp_end_task ();
950 thr->ts = team->prev_ts;
952 if (__builtin_expect (thr->ts.level != 0, 0))
954 #ifdef HAVE_SYNC_BUILTINS
955 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
956 #else
957 gomp_mutex_lock (&gomp_managed_threads_lock);
958 gomp_managed_threads -= team->nthreads - 1L;
959 gomp_mutex_unlock (&gomp_managed_threads_lock);
960 #endif
961 /* This barrier has gomp_barrier_wait_last counterparts
962 and ensures the team can be safely destroyed. */
963 gomp_barrier_wait (&team->barrier);
966 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
968 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
971 struct gomp_work_share *next_ws = ws->next_alloc;
972 free (ws);
973 ws = next_ws;
975 while (ws != NULL);
977 gomp_sem_destroy (&team->master_release);
979 if (__builtin_expect (thr->ts.team != NULL, 0)
980 || __builtin_expect (team->nthreads == 1, 0))
981 free_team (team);
982 else
984 struct gomp_thread_pool *pool = thr->thread_pool;
985 if (pool->last_team)
986 free_team (pool->last_team);
987 pool->last_team = team;
988 gomp_release_thread_pool (pool);
992 #ifdef LIBGOMP_USE_PTHREADS
994 /* Constructors for this file. */
996 static void __attribute__((constructor))
997 initialize_team (void)
999 #if !defined HAVE_TLS && !defined USE_EMUTLS
1000 static struct gomp_thread initial_thread_tls_data;
1002 pthread_key_create (&gomp_tls_key, NULL);
1003 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1004 #endif
1006 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1007 gomp_fatal ("could not create thread pool destructor.");
1010 static void __attribute__((destructor))
1011 team_destructor (void)
1013 /* Without this dlclose on libgomp could lead to subsequent
1014 crashes. */
1015 pthread_key_delete (gomp_thread_destructor);
1018 /* Similar to gomp_free_pool_helper, but don't detach itself,
1019 gomp_pause_host will pthread_join those threads. */
1021 static void
1022 gomp_pause_pool_helper (void *thread_pool)
1024 struct gomp_thread *thr = gomp_thread ();
1025 struct gomp_thread_pool *pool
1026 = (struct gomp_thread_pool *) thread_pool;
1027 gomp_simple_barrier_wait_last (&pool->threads_dock);
1028 gomp_sem_destroy (&thr->release);
1029 thr->thread_pool = NULL;
1030 thr->task = NULL;
1031 pthread_exit (NULL);
1034 /* Free a thread pool and release its threads. Return non-zero on
1035 failure. */
1038 gomp_pause_host (void)
1040 struct gomp_thread *thr = gomp_thread ();
1041 struct gomp_thread_pool *pool = thr->thread_pool;
1042 if (thr->ts.level)
1043 return -1;
1044 if (pool)
1046 if (pool->threads_used > 0)
1048 int i;
1049 pthread_t *thrs
1050 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1051 for (i = 1; i < pool->threads_used; i++)
1053 struct gomp_thread *nthr = pool->threads[i];
1054 nthr->fn = gomp_pause_pool_helper;
1055 nthr->data = pool;
1056 thrs[i] = gomp_thread_to_pthread_t (nthr);
1058 /* This barrier undocks threads docked on pool->threads_dock. */
1059 gomp_simple_barrier_wait (&pool->threads_dock);
1060 /* And this waits till all threads have called gomp_barrier_wait_last
1061 in gomp_pause_pool_helper. */
1062 gomp_simple_barrier_wait (&pool->threads_dock);
1063 /* Now it is safe to destroy the barrier and free the pool. */
1064 gomp_simple_barrier_destroy (&pool->threads_dock);
1066 #ifdef HAVE_SYNC_BUILTINS
1067 __sync_fetch_and_add (&gomp_managed_threads,
1068 1L - pool->threads_used);
1069 #else
1070 gomp_mutex_lock (&gomp_managed_threads_lock);
1071 gomp_managed_threads -= pool->threads_used - 1L;
1072 gomp_mutex_unlock (&gomp_managed_threads_lock);
1073 #endif
1074 for (i = 1; i < pool->threads_used; i++)
1075 pthread_join (thrs[i], NULL);
1077 if (pool->last_team)
1078 free_team (pool->last_team);
1079 #ifndef __nvptx__
1080 free (pool->threads);
1081 free (pool);
1082 #endif
1083 thr->thread_pool = NULL;
1085 return 0;
1087 #endif
1089 struct gomp_task_icv *
1090 gomp_new_icv (void)
1092 struct gomp_thread *thr = gomp_thread ();
1093 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1094 gomp_init_task (task, NULL, &gomp_global_icv);
1095 thr->task = task;
1096 #ifdef LIBGOMP_USE_PTHREADS
1097 pthread_setspecific (gomp_thread_destructor, thr);
1098 #endif
1099 return &task->icv;