compiler: load LHS subexpressions of op= assignment only once
[official-gcc.git] / libgomp / team.c
blobcb6875d70fa29b8b7a1dd0312bc1839949d47c51
1 /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 unsigned int num_teams;
60 unsigned int team_num;
61 bool nested;
62 pthread_t handle;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
69 static void *
70 gomp_thread_start (void *xdata)
72 struct gomp_thread_start_data *data = xdata;
73 struct gomp_thread *thr;
74 struct gomp_thread_pool *pool;
75 void (*local_fn) (void *);
76 void *local_data;
78 #if defined HAVE_TLS || defined USE_EMUTLS
79 thr = &gomp_tls_data;
80 #else
81 struct gomp_thread local_thr;
82 thr = &local_thr;
83 #endif
84 gomp_sem_init (&thr->release, 0);
86 /* Extract what we need from data. */
87 local_fn = data->fn;
88 local_data = data->fn_data;
89 thr->thread_pool = data->thread_pool;
90 thr->ts = data->ts;
91 thr->task = data->task;
92 thr->place = data->place;
93 thr->num_teams = data->num_teams;
94 thr->team_num = data->team_num;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr->handle = data->handle;
97 #endif
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key, thr);
100 #endif
102 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
104 /* Make thread pool local. */
105 pool = thr->thread_pool;
107 if (data->nested)
109 struct gomp_team *team = thr->ts.team;
110 struct gomp_task *task = thr->task;
112 gomp_barrier_wait (&team->barrier);
114 local_fn (local_data);
115 gomp_team_barrier_wait_final (&team->barrier);
116 gomp_finish_task (task);
117 gomp_barrier_wait_last (&team->barrier);
119 else
121 pool->threads[thr->ts.team_id] = thr;
123 gomp_simple_barrier_wait (&pool->threads_dock);
126 struct gomp_team *team = thr->ts.team;
127 struct gomp_task *task = thr->task;
129 local_fn (local_data);
130 gomp_team_barrier_wait_final (&team->barrier);
131 gomp_finish_task (task);
133 gomp_simple_barrier_wait (&pool->threads_dock);
135 local_fn = thr->fn;
136 local_data = thr->data;
137 thr->fn = NULL;
139 while (local_fn);
142 gomp_sem_destroy (&thr->release);
143 pthread_detach (pthread_self ());
144 thr->thread_pool = NULL;
145 thr->task = NULL;
146 return NULL;
148 #endif
150 static inline struct gomp_team *
151 get_last_team (unsigned nthreads)
153 struct gomp_thread *thr = gomp_thread ();
154 if (thr->ts.team == NULL)
156 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157 struct gomp_team *last_team = pool->last_team;
158 if (last_team != NULL && last_team->nthreads == nthreads)
160 pool->last_team = NULL;
161 return last_team;
164 return NULL;
167 /* Create a new team data structure. */
169 struct gomp_team *
170 gomp_new_team (unsigned nthreads)
172 struct gomp_team *team;
173 int i;
175 team = get_last_team (nthreads);
176 if (team == NULL)
178 size_t extra = sizeof (team->ordered_release[0])
179 + sizeof (team->implicit_task[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team = gomp_aligned_alloc (__alignof (struct gomp_team),
182 sizeof (*team) + nthreads * extra);
183 #else
184 team = team_malloc (sizeof (*team) + nthreads * extra);
185 #endif
187 #ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team->work_share_list_free_lock);
189 #endif
190 gomp_barrier_init (&team->barrier, nthreads);
191 gomp_mutex_init (&team->task_lock);
193 team->nthreads = nthreads;
196 team->work_share_chunk = 8;
197 #ifdef HAVE_SYNC_BUILTINS
198 team->single_count = 0;
199 #endif
200 team->work_shares_to_free = &team->work_shares[0];
201 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
202 team->work_shares[0].next_alloc = NULL;
203 team->work_share_list_free = NULL;
204 team->work_share_list_alloc = &team->work_shares[1];
205 for (i = 1; i < 7; i++)
206 team->work_shares[i].next_free = &team->work_shares[i + 1];
207 team->work_shares[i].next_free = NULL;
209 gomp_sem_init (&team->master_release, 0);
210 team->ordered_release = (void *) &team->implicit_task[nthreads];
211 team->ordered_release[0] = &team->master_release;
213 priority_queue_init (&team->task_queue);
214 team->task_count = 0;
215 team->task_queued_count = 0;
216 team->task_running_count = 0;
217 team->work_share_cancelled = 0;
218 team->team_cancelled = 0;
220 team->task_detach_count = 0;
222 return team;
226 /* Free a team data structure. */
228 static void
229 free_team (struct gomp_team *team)
231 #ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team->work_share_list_free_lock);
233 #endif
234 gomp_barrier_destroy (&team->barrier);
235 gomp_mutex_destroy (&team->task_lock);
236 priority_queue_free (&team->task_queue);
237 team_free (team);
240 static void
241 gomp_free_pool_helper (void *thread_pool)
243 struct gomp_thread *thr = gomp_thread ();
244 struct gomp_thread_pool *pool
245 = (struct gomp_thread_pool *) thread_pool;
246 gomp_simple_barrier_wait_last (&pool->threads_dock);
247 gomp_sem_destroy (&thr->release);
248 thr->thread_pool = NULL;
249 thr->task = NULL;
250 #ifdef LIBGOMP_USE_PTHREADS
251 pthread_detach (pthread_self ());
252 pthread_exit (NULL);
253 #elif defined(__nvptx__)
254 asm ("exit;");
255 #elif defined(__AMDGCN__)
256 asm ("s_dcache_wb\n\t"
257 "s_endpgm");
258 #else
259 #error gomp_free_pool_helper must terminate the thread
260 #endif
263 /* Free a thread pool and release its threads. */
265 void
266 gomp_free_thread (void *arg __attribute__((unused)))
268 struct gomp_thread *thr = gomp_thread ();
269 struct gomp_thread_pool *pool = thr->thread_pool;
270 if (pool)
272 if (pool->threads_used > 0)
274 int i;
275 for (i = 1; i < pool->threads_used; i++)
277 struct gomp_thread *nthr = pool->threads[i];
278 nthr->fn = gomp_free_pool_helper;
279 nthr->data = pool;
281 /* This barrier undocks threads docked on pool->threads_dock. */
282 gomp_simple_barrier_wait (&pool->threads_dock);
283 /* And this waits till all threads have called gomp_barrier_wait_last
284 in gomp_free_pool_helper. */
285 gomp_simple_barrier_wait (&pool->threads_dock);
286 /* Now it is safe to destroy the barrier and free the pool. */
287 gomp_simple_barrier_destroy (&pool->threads_dock);
289 #ifdef HAVE_SYNC_BUILTINS
290 __sync_fetch_and_add (&gomp_managed_threads,
291 1L - pool->threads_used);
292 #else
293 gomp_mutex_lock (&gomp_managed_threads_lock);
294 gomp_managed_threads -= pool->threads_used - 1L;
295 gomp_mutex_unlock (&gomp_managed_threads_lock);
296 #endif
298 if (pool->last_team)
299 free_team (pool->last_team);
300 #ifndef __nvptx__
301 team_free (pool->threads);
302 team_free (pool);
303 #endif
304 thr->thread_pool = NULL;
306 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
307 gomp_team_end ();
308 if (thr->task != NULL)
310 struct gomp_task *task = thr->task;
311 gomp_end_task ();
312 free (task);
316 /* Launch a team. */
318 #ifdef LIBGOMP_USE_PTHREADS
319 void
320 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
321 unsigned flags, struct gomp_team *team,
322 struct gomp_taskgroup *taskgroup)
324 struct gomp_thread_start_data *start_data = NULL;
325 struct gomp_thread *thr, *nthr;
326 struct gomp_task *task;
327 struct gomp_task_icv *icv;
328 bool nested;
329 struct gomp_thread_pool *pool;
330 unsigned i, n, old_threads_used = 0;
331 pthread_attr_t thread_attr, *attr;
332 unsigned long nthreads_var;
333 char bind, bind_var;
334 unsigned int s = 0, rest = 0, p = 0, k = 0;
335 unsigned int affinity_count = 0;
336 struct gomp_thread **affinity_thr = NULL;
337 bool force_display = false;
339 thr = gomp_thread ();
340 nested = thr->ts.level;
341 pool = thr->thread_pool;
342 task = thr->task;
343 icv = task ? &task->icv : &gomp_global_icv;
344 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
346 gomp_init_affinity ();
347 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
348 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
349 thr->place);
352 /* Always save the previous state, even if this isn't a nested team.
353 In particular, we should save any work share state from an outer
354 orphaned work share construct. */
355 team->prev_ts = thr->ts;
357 thr->ts.team = team;
358 thr->ts.team_id = 0;
359 ++thr->ts.level;
360 if (nthreads > 1)
361 ++thr->ts.active_level;
362 thr->ts.work_share = &team->work_shares[0];
363 thr->ts.last_work_share = NULL;
364 #ifdef HAVE_SYNC_BUILTINS
365 thr->ts.single_count = 0;
366 #endif
367 thr->ts.static_trip = 0;
368 thr->task = &team->implicit_task[0];
369 #ifdef GOMP_NEEDS_THREAD_HANDLE
370 thr->handle = pthread_self ();
371 #endif
372 nthreads_var = icv->nthreads_var;
373 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
374 && thr->ts.level < gomp_nthreads_var_list_len)
375 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
376 bind_var = icv->bind_var;
377 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
378 bind_var = flags & 7;
379 bind = bind_var;
380 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
381 && thr->ts.level < gomp_bind_var_list_len)
382 bind_var = gomp_bind_var_list[thr->ts.level];
383 gomp_init_task (thr->task, task, icv);
384 thr->task->taskgroup = taskgroup;
385 team->implicit_task[0].icv.nthreads_var = nthreads_var;
386 team->implicit_task[0].icv.bind_var = bind_var;
388 if (nthreads == 1)
389 return;
391 i = 1;
393 if (__builtin_expect (gomp_places_list != NULL, 0))
395 /* Depending on chosen proc_bind model, set subpartition
396 for the master thread and initialize helper variables
397 P and optionally S, K and/or REST used by later place
398 computation for each additional thread. */
399 p = thr->place - 1;
400 switch (bind)
402 case omp_proc_bind_true:
403 case omp_proc_bind_close:
404 if (nthreads > thr->ts.place_partition_len)
406 /* T > P. S threads will be placed in each place,
407 and the final REM threads placed one by one
408 into the already occupied places. */
409 s = nthreads / thr->ts.place_partition_len;
410 rest = nthreads % thr->ts.place_partition_len;
412 else
413 s = 1;
414 k = 1;
415 break;
416 case omp_proc_bind_master:
417 /* Each thread will be bound to master's place. */
418 break;
419 case omp_proc_bind_spread:
420 if (nthreads <= thr->ts.place_partition_len)
422 /* T <= P. Each subpartition will have in between s
423 and s+1 places (subpartitions starting at or
424 after rest will have s places, earlier s+1 places),
425 each thread will be bound to the first place in
426 its subpartition (except for the master thread
427 that can be bound to another place in its
428 subpartition). */
429 s = thr->ts.place_partition_len / nthreads;
430 rest = thr->ts.place_partition_len % nthreads;
431 rest = (s + 1) * rest + thr->ts.place_partition_off;
432 if (p < rest)
434 p -= (p - thr->ts.place_partition_off) % (s + 1);
435 thr->ts.place_partition_len = s + 1;
437 else
439 p -= (p - rest) % s;
440 thr->ts.place_partition_len = s;
442 thr->ts.place_partition_off = p;
444 else
446 /* T > P. Each subpartition will have just a single
447 place and we'll place between s and s+1
448 threads into each subpartition. */
449 s = nthreads / thr->ts.place_partition_len;
450 rest = nthreads % thr->ts.place_partition_len;
451 thr->ts.place_partition_off = p;
452 thr->ts.place_partition_len = 1;
453 k = 1;
455 break;
458 else
459 bind = omp_proc_bind_false;
461 /* We only allow the reuse of idle threads for non-nested PARALLEL
462 regions. This appears to be implied by the semantics of
463 threadprivate variables, but perhaps that's reading too much into
464 things. Certainly it does prevent any locking problems, since
465 only the initial program thread will modify gomp_threads. */
466 if (!nested)
468 old_threads_used = pool->threads_used;
470 if (nthreads <= old_threads_used)
471 n = nthreads;
472 else if (old_threads_used == 0)
474 n = 0;
475 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
477 else
479 n = old_threads_used;
481 /* Increase the barrier threshold to make sure all new
482 threads arrive before the team is released. */
483 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
486 /* Not true yet, but soon will be. We're going to release all
487 threads from the dock, and those that aren't part of the
488 team will exit. */
489 pool->threads_used = nthreads;
491 /* If necessary, expand the size of the gomp_threads array. It is
492 expected that changes in the number of threads are rare, thus we
493 make no effort to expand gomp_threads_size geometrically. */
494 if (nthreads >= pool->threads_size)
496 pool->threads_size = nthreads + 1;
497 pool->threads
498 = gomp_realloc (pool->threads,
499 pool->threads_size
500 * sizeof (struct gomp_thread *));
501 /* Add current (master) thread to threads[]. */
502 pool->threads[0] = thr;
505 /* Release existing idle threads. */
506 for (; i < n; ++i)
508 unsigned int place_partition_off = thr->ts.place_partition_off;
509 unsigned int place_partition_len = thr->ts.place_partition_len;
510 unsigned int place = 0;
511 if (__builtin_expect (gomp_places_list != NULL, 0))
513 switch (bind)
515 case omp_proc_bind_true:
516 case omp_proc_bind_close:
517 if (k == s)
519 ++p;
520 if (p == (team->prev_ts.place_partition_off
521 + team->prev_ts.place_partition_len))
522 p = team->prev_ts.place_partition_off;
523 k = 1;
524 if (i == nthreads - rest)
525 s = 1;
527 else
528 ++k;
529 break;
530 case omp_proc_bind_master:
531 break;
532 case omp_proc_bind_spread:
533 if (k == 0)
535 /* T <= P. */
536 if (p < rest)
537 p += s + 1;
538 else
539 p += s;
540 if (p == (team->prev_ts.place_partition_off
541 + team->prev_ts.place_partition_len))
542 p = team->prev_ts.place_partition_off;
543 place_partition_off = p;
544 if (p < rest)
545 place_partition_len = s + 1;
546 else
547 place_partition_len = s;
549 else
551 /* T > P. */
552 if (k == s)
554 ++p;
555 if (p == (team->prev_ts.place_partition_off
556 + team->prev_ts.place_partition_len))
557 p = team->prev_ts.place_partition_off;
558 k = 1;
559 if (i == nthreads - rest)
560 s = 1;
562 else
563 ++k;
564 place_partition_off = p;
565 place_partition_len = 1;
567 break;
569 if (affinity_thr != NULL
570 || (bind != omp_proc_bind_true
571 && pool->threads[i]->place != p + 1)
572 || pool->threads[i]->place <= place_partition_off
573 || pool->threads[i]->place > (place_partition_off
574 + place_partition_len))
576 unsigned int l;
577 force_display = true;
578 if (affinity_thr == NULL)
580 unsigned int j;
582 if (team->prev_ts.place_partition_len > 64)
583 affinity_thr
584 = gomp_malloc (team->prev_ts.place_partition_len
585 * sizeof (struct gomp_thread *));
586 else
587 affinity_thr
588 = gomp_alloca (team->prev_ts.place_partition_len
589 * sizeof (struct gomp_thread *));
590 memset (affinity_thr, '\0',
591 team->prev_ts.place_partition_len
592 * sizeof (struct gomp_thread *));
593 for (j = i; j < old_threads_used; j++)
595 if (pool->threads[j]->place
596 > team->prev_ts.place_partition_off
597 && (pool->threads[j]->place
598 <= (team->prev_ts.place_partition_off
599 + team->prev_ts.place_partition_len)))
601 l = pool->threads[j]->place - 1
602 - team->prev_ts.place_partition_off;
603 pool->threads[j]->data = affinity_thr[l];
604 affinity_thr[l] = pool->threads[j];
606 pool->threads[j] = NULL;
608 if (nthreads > old_threads_used)
609 memset (&pool->threads[old_threads_used],
610 '\0', ((nthreads - old_threads_used)
611 * sizeof (struct gomp_thread *)));
612 n = nthreads;
613 affinity_count = old_threads_used - i;
615 if (affinity_count == 0)
616 break;
617 l = p;
618 if (affinity_thr[l - team->prev_ts.place_partition_off]
619 == NULL)
621 if (bind != omp_proc_bind_true)
622 continue;
623 for (l = place_partition_off;
624 l < place_partition_off + place_partition_len;
625 l++)
626 if (affinity_thr[l - team->prev_ts.place_partition_off]
627 != NULL)
628 break;
629 if (l == place_partition_off + place_partition_len)
630 continue;
632 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
633 affinity_thr[l - team->prev_ts.place_partition_off]
634 = (struct gomp_thread *) nthr->data;
635 affinity_count--;
636 pool->threads[i] = nthr;
638 else
639 nthr = pool->threads[i];
640 place = p + 1;
642 else
643 nthr = pool->threads[i];
644 nthr->ts.team = team;
645 nthr->ts.work_share = &team->work_shares[0];
646 nthr->ts.last_work_share = NULL;
647 nthr->ts.team_id = i;
648 nthr->ts.level = team->prev_ts.level + 1;
649 nthr->ts.active_level = thr->ts.active_level;
650 nthr->ts.place_partition_off = place_partition_off;
651 nthr->ts.place_partition_len = place_partition_len;
652 nthr->ts.def_allocator = thr->ts.def_allocator;
653 #ifdef HAVE_SYNC_BUILTINS
654 nthr->ts.single_count = 0;
655 #endif
656 nthr->ts.static_trip = 0;
657 nthr->num_teams = thr->num_teams;
658 nthr->team_num = thr->team_num;
659 nthr->task = &team->implicit_task[i];
660 nthr->place = place;
661 gomp_init_task (nthr->task, task, icv);
662 team->implicit_task[i].icv.nthreads_var = nthreads_var;
663 team->implicit_task[i].icv.bind_var = bind_var;
664 nthr->task->taskgroup = taskgroup;
665 nthr->fn = fn;
666 nthr->data = data;
667 team->ordered_release[i] = &nthr->release;
670 if (__builtin_expect (affinity_thr != NULL, 0))
672 /* If AFFINITY_THR is non-NULL just because we had to
673 permute some threads in the pool, but we've managed
674 to find exactly as many old threads as we'd find
675 without affinity, we don't need to handle this
676 specially anymore. */
677 if (nthreads <= old_threads_used
678 ? (affinity_count == old_threads_used - nthreads)
679 : (i == old_threads_used))
681 if (team->prev_ts.place_partition_len > 64)
682 free (affinity_thr);
683 affinity_thr = NULL;
684 affinity_count = 0;
686 else
688 i = 1;
689 /* We are going to compute the places/subpartitions
690 again from the beginning. So, we need to reinitialize
691 vars modified by the switch (bind) above inside
692 of the loop, to the state they had after the initial
693 switch (bind). */
694 switch (bind)
696 case omp_proc_bind_true:
697 case omp_proc_bind_close:
698 if (nthreads > thr->ts.place_partition_len)
699 /* T > P. S has been changed, so needs
700 to be recomputed. */
701 s = nthreads / thr->ts.place_partition_len;
702 k = 1;
703 p = thr->place - 1;
704 break;
705 case omp_proc_bind_master:
706 /* No vars have been changed. */
707 break;
708 case omp_proc_bind_spread:
709 p = thr->ts.place_partition_off;
710 if (k != 0)
712 /* T > P. */
713 s = nthreads / team->prev_ts.place_partition_len;
714 k = 1;
716 break;
719 /* Increase the barrier threshold to make sure all new
720 threads and all the threads we're going to let die
721 arrive before the team is released. */
722 if (affinity_count)
723 gomp_simple_barrier_reinit (&pool->threads_dock,
724 nthreads + affinity_count);
728 if (i == nthreads)
729 goto do_release;
733 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
735 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
737 if (old_threads_used == 0)
738 --diff;
740 #ifdef HAVE_SYNC_BUILTINS
741 __sync_fetch_and_add (&gomp_managed_threads, diff);
742 #else
743 gomp_mutex_lock (&gomp_managed_threads_lock);
744 gomp_managed_threads += diff;
745 gomp_mutex_unlock (&gomp_managed_threads_lock);
746 #endif
749 attr = &gomp_thread_attr;
750 if (__builtin_expect (gomp_places_list != NULL, 0))
752 size_t stacksize;
753 pthread_attr_init (&thread_attr);
754 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
755 pthread_attr_setstacksize (&thread_attr, stacksize);
756 attr = &thread_attr;
759 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
760 * (nthreads - i));
762 /* Launch new threads. */
763 for (; i < nthreads; ++i)
765 int err;
767 start_data->ts.place_partition_off = thr->ts.place_partition_off;
768 start_data->ts.place_partition_len = thr->ts.place_partition_len;
769 start_data->place = 0;
770 if (__builtin_expect (gomp_places_list != NULL, 0))
772 switch (bind)
774 case omp_proc_bind_true:
775 case omp_proc_bind_close:
776 if (k == s)
778 ++p;
779 if (p == (team->prev_ts.place_partition_off
780 + team->prev_ts.place_partition_len))
781 p = team->prev_ts.place_partition_off;
782 k = 1;
783 if (i == nthreads - rest)
784 s = 1;
786 else
787 ++k;
788 break;
789 case omp_proc_bind_master:
790 break;
791 case omp_proc_bind_spread:
792 if (k == 0)
794 /* T <= P. */
795 if (p < rest)
796 p += s + 1;
797 else
798 p += s;
799 if (p == (team->prev_ts.place_partition_off
800 + team->prev_ts.place_partition_len))
801 p = team->prev_ts.place_partition_off;
802 start_data->ts.place_partition_off = p;
803 if (p < rest)
804 start_data->ts.place_partition_len = s + 1;
805 else
806 start_data->ts.place_partition_len = s;
808 else
810 /* T > P. */
811 if (k == s)
813 ++p;
814 if (p == (team->prev_ts.place_partition_off
815 + team->prev_ts.place_partition_len))
816 p = team->prev_ts.place_partition_off;
817 k = 1;
818 if (i == nthreads - rest)
819 s = 1;
821 else
822 ++k;
823 start_data->ts.place_partition_off = p;
824 start_data->ts.place_partition_len = 1;
826 break;
828 start_data->place = p + 1;
829 if (affinity_thr != NULL && pool->threads[i] != NULL)
830 continue;
831 gomp_init_thread_affinity (attr, p);
834 start_data->fn = fn;
835 start_data->fn_data = data;
836 start_data->ts.team = team;
837 start_data->ts.work_share = &team->work_shares[0];
838 start_data->ts.last_work_share = NULL;
839 start_data->ts.team_id = i;
840 start_data->ts.level = team->prev_ts.level + 1;
841 start_data->ts.active_level = thr->ts.active_level;
842 start_data->ts.def_allocator = thr->ts.def_allocator;
843 #ifdef HAVE_SYNC_BUILTINS
844 start_data->ts.single_count = 0;
845 #endif
846 start_data->ts.static_trip = 0;
847 start_data->num_teams = thr->num_teams;
848 start_data->team_num = thr->team_num;
849 start_data->task = &team->implicit_task[i];
850 gomp_init_task (start_data->task, task, icv);
851 team->implicit_task[i].icv.nthreads_var = nthreads_var;
852 team->implicit_task[i].icv.bind_var = bind_var;
853 start_data->task->taskgroup = taskgroup;
854 start_data->thread_pool = pool;
855 start_data->nested = nested;
857 attr = gomp_adjust_thread_attr (attr, &thread_attr);
858 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
859 start_data);
860 start_data++;
861 if (err != 0)
862 gomp_fatal ("Thread creation failed: %s", strerror (err));
865 if (__builtin_expect (attr == &thread_attr, 0))
866 pthread_attr_destroy (&thread_attr);
868 do_release:
869 if (nested)
870 gomp_barrier_wait (&team->barrier);
871 else
872 gomp_simple_barrier_wait (&pool->threads_dock);
874 /* Decrease the barrier threshold to match the number of threads
875 that should arrive back at the end of this team. The extra
876 threads should be exiting. Note that we arrange for this test
877 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
878 the barrier as well as gomp_managed_threads was temporarily
879 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
880 AFFINITY_COUNT if non-zero will be always at least
881 OLD_THREADS_COUNT - NTHREADS. */
882 if (__builtin_expect (nthreads < old_threads_used, 0)
883 || __builtin_expect (affinity_count, 0))
885 long diff = (long) nthreads - (long) old_threads_used;
887 if (affinity_count)
888 diff = -affinity_count;
890 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
892 #ifdef HAVE_SYNC_BUILTINS
893 __sync_fetch_and_add (&gomp_managed_threads, diff);
894 #else
895 gomp_mutex_lock (&gomp_managed_threads_lock);
896 gomp_managed_threads += diff;
897 gomp_mutex_unlock (&gomp_managed_threads_lock);
898 #endif
900 if (__builtin_expect (gomp_display_affinity_var, 0))
902 if (nested
903 || nthreads != old_threads_used
904 || force_display)
906 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
907 thr->place);
908 if (nested)
910 start_data -= nthreads - 1;
911 for (i = 1; i < nthreads; ++i)
913 gomp_display_affinity_thread (
914 #ifdef LIBGOMP_USE_PTHREADS
915 start_data->handle,
916 #else
917 gomp_thread_self (),
918 #endif
919 &start_data->ts,
920 start_data->place);
921 start_data++;
924 else
926 for (i = 1; i < nthreads; ++i)
928 gomp_thread_handle handle
929 = gomp_thread_to_pthread_t (pool->threads[i]);
930 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
931 pool->threads[i]->place);
936 if (__builtin_expect (affinity_thr != NULL, 0)
937 && team->prev_ts.place_partition_len > 64)
938 free (affinity_thr);
940 #endif
943 /* Terminate the current team. This is only to be called by the master
944 thread. We assume that we must wait for the other threads. */
946 void
947 gomp_team_end (void)
949 struct gomp_thread *thr = gomp_thread ();
950 struct gomp_team *team = thr->ts.team;
952 /* This barrier handles all pending explicit threads.
953 As #pragma omp cancel parallel might get awaited count in
954 team->barrier in a inconsistent state, we need to use a different
955 counter here. */
956 gomp_team_barrier_wait_final (&team->barrier);
957 if (__builtin_expect (team->team_cancelled, 0))
959 struct gomp_work_share *ws = team->work_shares_to_free;
962 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
963 if (next_ws == NULL)
964 gomp_ptrlock_set (&ws->next_ws, ws);
965 gomp_fini_work_share (ws);
966 ws = next_ws;
968 while (ws != NULL);
970 else
971 gomp_fini_work_share (thr->ts.work_share);
973 gomp_end_task ();
974 thr->ts = team->prev_ts;
976 if (__builtin_expect (thr->ts.level != 0, 0))
978 #ifdef HAVE_SYNC_BUILTINS
979 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
980 #else
981 gomp_mutex_lock (&gomp_managed_threads_lock);
982 gomp_managed_threads -= team->nthreads - 1L;
983 gomp_mutex_unlock (&gomp_managed_threads_lock);
984 #endif
985 /* This barrier has gomp_barrier_wait_last counterparts
986 and ensures the team can be safely destroyed. */
987 gomp_barrier_wait (&team->barrier);
990 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
992 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
995 struct gomp_work_share *next_ws = ws->next_alloc;
996 free (ws);
997 ws = next_ws;
999 while (ws != NULL);
1001 gomp_sem_destroy (&team->master_release);
1003 if (__builtin_expect (thr->ts.team != NULL, 0)
1004 || __builtin_expect (team->nthreads == 1, 0))
1005 free_team (team);
1006 else
1008 struct gomp_thread_pool *pool = thr->thread_pool;
1009 if (pool->last_team)
1010 free_team (pool->last_team);
1011 pool->last_team = team;
1012 gomp_release_thread_pool (pool);
1016 #ifdef LIBGOMP_USE_PTHREADS
1018 /* Constructors for this file. */
1020 static void __attribute__((constructor))
1021 initialize_team (void)
1023 #if !defined HAVE_TLS && !defined USE_EMUTLS
1024 static struct gomp_thread initial_thread_tls_data;
1026 pthread_key_create (&gomp_tls_key, NULL);
1027 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1028 #endif
1030 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1031 gomp_fatal ("could not create thread pool destructor.");
1034 static void __attribute__((destructor))
1035 team_destructor (void)
1037 /* Without this dlclose on libgomp could lead to subsequent
1038 crashes. */
1039 pthread_key_delete (gomp_thread_destructor);
1042 /* Similar to gomp_free_pool_helper, but don't detach itself,
1043 gomp_pause_host will pthread_join those threads. */
1045 static void
1046 gomp_pause_pool_helper (void *thread_pool)
1048 struct gomp_thread *thr = gomp_thread ();
1049 struct gomp_thread_pool *pool
1050 = (struct gomp_thread_pool *) thread_pool;
1051 gomp_simple_barrier_wait_last (&pool->threads_dock);
1052 gomp_sem_destroy (&thr->release);
1053 thr->thread_pool = NULL;
1054 thr->task = NULL;
1055 pthread_exit (NULL);
1058 /* Free a thread pool and release its threads. Return non-zero on
1059 failure. */
1062 gomp_pause_host (void)
1064 struct gomp_thread *thr = gomp_thread ();
1065 struct gomp_thread_pool *pool = thr->thread_pool;
1066 if (thr->ts.level)
1067 return -1;
1068 if (pool)
1070 if (pool->threads_used > 0)
1072 int i;
1073 pthread_t *thrs
1074 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1075 for (i = 1; i < pool->threads_used; i++)
1077 struct gomp_thread *nthr = pool->threads[i];
1078 nthr->fn = gomp_pause_pool_helper;
1079 nthr->data = pool;
1080 thrs[i] = gomp_thread_to_pthread_t (nthr);
1082 /* This barrier undocks threads docked on pool->threads_dock. */
1083 gomp_simple_barrier_wait (&pool->threads_dock);
1084 /* And this waits till all threads have called gomp_barrier_wait_last
1085 in gomp_pause_pool_helper. */
1086 gomp_simple_barrier_wait (&pool->threads_dock);
1087 /* Now it is safe to destroy the barrier and free the pool. */
1088 gomp_simple_barrier_destroy (&pool->threads_dock);
1090 #ifdef HAVE_SYNC_BUILTINS
1091 __sync_fetch_and_add (&gomp_managed_threads,
1092 1L - pool->threads_used);
1093 #else
1094 gomp_mutex_lock (&gomp_managed_threads_lock);
1095 gomp_managed_threads -= pool->threads_used - 1L;
1096 gomp_mutex_unlock (&gomp_managed_threads_lock);
1097 #endif
1098 for (i = 1; i < pool->threads_used; i++)
1099 pthread_join (thrs[i], NULL);
1101 if (pool->last_team)
1102 free_team (pool->last_team);
1103 #ifndef __nvptx__
1104 team_free (pool->threads);
1105 team_free (pool);
1106 #endif
1107 thr->thread_pool = NULL;
1109 return 0;
1111 #endif
1113 struct gomp_task_icv *
1114 gomp_new_icv (void)
1116 struct gomp_thread *thr = gomp_thread ();
1117 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1118 gomp_init_task (task, NULL, &gomp_global_icv);
1119 thr->task = task;
1120 #ifdef LIBGOMP_USE_PTHREADS
1121 pthread_setspecific (gomp_thread_destructor, thr);
1122 #endif
1123 return &task->icv;