1 /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr
;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor
;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread
struct gomp_thread gomp_tls_data
;
45 pthread_key_t gomp_tls_key
;
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
55 struct gomp_team_state ts
;
56 struct gomp_task
*task
;
57 struct gomp_thread_pool
*thread_pool
;
59 unsigned int num_teams
;
60 unsigned int team_num
;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
70 gomp_thread_start (void *xdata
)
72 struct gomp_thread_start_data
*data
= xdata
;
73 struct gomp_thread
*thr
;
74 struct gomp_thread_pool
*pool
;
75 void (*local_fn
) (void *);
78 #if defined HAVE_TLS || defined USE_EMUTLS
81 struct gomp_thread local_thr
;
84 gomp_sem_init (&thr
->release
, 0);
86 /* Extract what we need from data. */
88 local_data
= data
->fn_data
;
89 thr
->thread_pool
= data
->thread_pool
;
91 thr
->task
= data
->task
;
92 thr
->place
= data
->place
;
93 thr
->num_teams
= data
->num_teams
;
94 thr
->team_num
= data
->team_num
;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr
->handle
= data
->handle
;
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key
, thr
);
102 thr
->ts
.team
->ordered_release
[thr
->ts
.team_id
] = &thr
->release
;
104 /* Make thread pool local. */
105 pool
= thr
->thread_pool
;
109 struct gomp_team
*team
= thr
->ts
.team
;
110 struct gomp_task
*task
= thr
->task
;
112 gomp_barrier_wait (&team
->barrier
);
114 local_fn (local_data
);
115 gomp_team_barrier_wait_final (&team
->barrier
);
116 gomp_finish_task (task
);
117 gomp_barrier_wait_last (&team
->barrier
);
121 pool
->threads
[thr
->ts
.team_id
] = thr
;
123 gomp_simple_barrier_wait (&pool
->threads_dock
);
126 struct gomp_team
*team
= thr
->ts
.team
;
127 struct gomp_task
*task
= thr
->task
;
129 local_fn (local_data
);
130 gomp_team_barrier_wait_final (&team
->barrier
);
131 gomp_finish_task (task
);
133 gomp_simple_barrier_wait (&pool
->threads_dock
);
136 local_data
= thr
->data
;
142 gomp_sem_destroy (&thr
->release
);
143 pthread_detach (pthread_self ());
144 thr
->thread_pool
= NULL
;
150 static inline struct gomp_team
*
151 get_last_team (unsigned nthreads
)
153 struct gomp_thread
*thr
= gomp_thread ();
154 if (thr
->ts
.team
== NULL
)
156 struct gomp_thread_pool
*pool
= gomp_get_thread_pool (thr
, nthreads
);
157 struct gomp_team
*last_team
= pool
->last_team
;
158 if (last_team
!= NULL
&& last_team
->nthreads
== nthreads
)
160 pool
->last_team
= NULL
;
167 /* Create a new team data structure. */
170 gomp_new_team (unsigned nthreads
)
172 struct gomp_team
*team
;
175 team
= get_last_team (nthreads
);
178 size_t extra
= sizeof (team
->ordered_release
[0])
179 + sizeof (team
->implicit_task
[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team
= gomp_aligned_alloc (__alignof (struct gomp_team
),
182 sizeof (*team
) + nthreads
* extra
);
184 team
= team_malloc (sizeof (*team
) + nthreads
* extra
);
187 #ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team
->work_share_list_free_lock
);
190 gomp_barrier_init (&team
->barrier
, nthreads
);
191 gomp_mutex_init (&team
->task_lock
);
193 team
->nthreads
= nthreads
;
196 team
->work_share_chunk
= 8;
197 #ifdef HAVE_SYNC_BUILTINS
198 team
->single_count
= 0;
200 team
->work_shares_to_free
= &team
->work_shares
[0];
201 gomp_init_work_share (&team
->work_shares
[0], 0, nthreads
);
202 team
->work_shares
[0].next_alloc
= NULL
;
203 team
->work_share_list_free
= NULL
;
204 team
->work_share_list_alloc
= &team
->work_shares
[1];
205 for (i
= 1; i
< 7; i
++)
206 team
->work_shares
[i
].next_free
= &team
->work_shares
[i
+ 1];
207 team
->work_shares
[i
].next_free
= NULL
;
209 gomp_sem_init (&team
->master_release
, 0);
210 team
->ordered_release
= (void *) &team
->implicit_task
[nthreads
];
211 team
->ordered_release
[0] = &team
->master_release
;
213 priority_queue_init (&team
->task_queue
);
214 team
->task_count
= 0;
215 team
->task_queued_count
= 0;
216 team
->task_running_count
= 0;
217 team
->work_share_cancelled
= 0;
218 team
->team_cancelled
= 0;
220 team
->task_detach_count
= 0;
226 /* Free a team data structure. */
229 free_team (struct gomp_team
*team
)
231 #ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team
->work_share_list_free_lock
);
234 gomp_barrier_destroy (&team
->barrier
);
235 gomp_mutex_destroy (&team
->task_lock
);
236 priority_queue_free (&team
->task_queue
);
241 gomp_free_pool_helper (void *thread_pool
)
243 struct gomp_thread
*thr
= gomp_thread ();
244 struct gomp_thread_pool
*pool
245 = (struct gomp_thread_pool
*) thread_pool
;
246 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
247 gomp_sem_destroy (&thr
->release
);
248 thr
->thread_pool
= NULL
;
250 #ifdef LIBGOMP_USE_PTHREADS
251 pthread_detach (pthread_self ());
253 #elif defined(__nvptx__)
255 #elif defined(__AMDGCN__)
256 asm ("s_dcache_wb\n\t"
259 #error gomp_free_pool_helper must terminate the thread
263 /* Free a thread pool and release its threads. */
266 gomp_free_thread (void *arg
__attribute__((unused
)))
268 struct gomp_thread
*thr
= gomp_thread ();
269 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
272 if (pool
->threads_used
> 0)
275 for (i
= 1; i
< pool
->threads_used
; i
++)
277 struct gomp_thread
*nthr
= pool
->threads
[i
];
278 nthr
->fn
= gomp_free_pool_helper
;
281 /* This barrier undocks threads docked on pool->threads_dock. */
282 gomp_simple_barrier_wait (&pool
->threads_dock
);
283 /* And this waits till all threads have called gomp_barrier_wait_last
284 in gomp_free_pool_helper. */
285 gomp_simple_barrier_wait (&pool
->threads_dock
);
286 /* Now it is safe to destroy the barrier and free the pool. */
287 gomp_simple_barrier_destroy (&pool
->threads_dock
);
289 #ifdef HAVE_SYNC_BUILTINS
290 __sync_fetch_and_add (&gomp_managed_threads
,
291 1L - pool
->threads_used
);
293 gomp_mutex_lock (&gomp_managed_threads_lock
);
294 gomp_managed_threads
-= pool
->threads_used
- 1L;
295 gomp_mutex_unlock (&gomp_managed_threads_lock
);
299 free_team (pool
->last_team
);
301 team_free (pool
->threads
);
304 thr
->thread_pool
= NULL
;
306 if (thr
->ts
.level
== 0 && __builtin_expect (thr
->ts
.team
!= NULL
, 0))
308 if (thr
->task
!= NULL
)
310 struct gomp_task
*task
= thr
->task
;
318 #ifdef LIBGOMP_USE_PTHREADS
320 gomp_team_start (void (*fn
) (void *), void *data
, unsigned nthreads
,
321 unsigned flags
, struct gomp_team
*team
,
322 struct gomp_taskgroup
*taskgroup
)
324 struct gomp_thread_start_data
*start_data
= NULL
;
325 struct gomp_thread
*thr
, *nthr
;
326 struct gomp_task
*task
;
327 struct gomp_task_icv
*icv
;
329 struct gomp_thread_pool
*pool
;
330 unsigned i
, n
, old_threads_used
= 0;
331 pthread_attr_t thread_attr
, *attr
;
332 unsigned long nthreads_var
;
334 unsigned int s
= 0, rest
= 0, p
= 0, k
= 0;
335 unsigned int affinity_count
= 0;
336 struct gomp_thread
**affinity_thr
= NULL
;
337 bool force_display
= false;
339 thr
= gomp_thread ();
340 nested
= thr
->ts
.level
;
341 pool
= thr
->thread_pool
;
343 icv
= task
? &task
->icv
: &gomp_global_icv
;
344 if (__builtin_expect (gomp_places_list
!= NULL
, 0) && thr
->place
== 0)
346 gomp_init_affinity ();
347 if (__builtin_expect (gomp_display_affinity_var
, 0) && nthreads
== 1)
348 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
352 /* Always save the previous state, even if this isn't a nested team.
353 In particular, we should save any work share state from an outer
354 orphaned work share construct. */
355 team
->prev_ts
= thr
->ts
;
361 ++thr
->ts
.active_level
;
362 thr
->ts
.work_share
= &team
->work_shares
[0];
363 thr
->ts
.last_work_share
= NULL
;
364 #ifdef HAVE_SYNC_BUILTINS
365 thr
->ts
.single_count
= 0;
367 thr
->ts
.static_trip
= 0;
368 thr
->task
= &team
->implicit_task
[0];
369 #ifdef GOMP_NEEDS_THREAD_HANDLE
370 thr
->handle
= pthread_self ();
372 nthreads_var
= icv
->nthreads_var
;
373 if (__builtin_expect (gomp_nthreads_var_list
!= NULL
, 0)
374 && thr
->ts
.level
< gomp_nthreads_var_list_len
)
375 nthreads_var
= gomp_nthreads_var_list
[thr
->ts
.level
];
376 bind_var
= icv
->bind_var
;
377 if (bind_var
!= omp_proc_bind_false
&& (flags
& 7) != omp_proc_bind_false
)
378 bind_var
= flags
& 7;
380 if (__builtin_expect (gomp_bind_var_list
!= NULL
, 0)
381 && thr
->ts
.level
< gomp_bind_var_list_len
)
382 bind_var
= gomp_bind_var_list
[thr
->ts
.level
];
383 gomp_init_task (thr
->task
, task
, icv
);
384 thr
->task
->taskgroup
= taskgroup
;
385 team
->implicit_task
[0].icv
.nthreads_var
= nthreads_var
;
386 team
->implicit_task
[0].icv
.bind_var
= bind_var
;
393 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
395 /* Depending on chosen proc_bind model, set subpartition
396 for the master thread and initialize helper variables
397 P and optionally S, K and/or REST used by later place
398 computation for each additional thread. */
402 case omp_proc_bind_true
:
403 case omp_proc_bind_close
:
404 if (nthreads
> thr
->ts
.place_partition_len
)
406 /* T > P. S threads will be placed in each place,
407 and the final REM threads placed one by one
408 into the already occupied places. */
409 s
= nthreads
/ thr
->ts
.place_partition_len
;
410 rest
= nthreads
% thr
->ts
.place_partition_len
;
416 case omp_proc_bind_master
:
417 /* Each thread will be bound to master's place. */
419 case omp_proc_bind_spread
:
420 if (nthreads
<= thr
->ts
.place_partition_len
)
422 /* T <= P. Each subpartition will have in between s
423 and s+1 places (subpartitions starting at or
424 after rest will have s places, earlier s+1 places),
425 each thread will be bound to the first place in
426 its subpartition (except for the master thread
427 that can be bound to another place in its
429 s
= thr
->ts
.place_partition_len
/ nthreads
;
430 rest
= thr
->ts
.place_partition_len
% nthreads
;
431 rest
= (s
+ 1) * rest
+ thr
->ts
.place_partition_off
;
434 p
-= (p
- thr
->ts
.place_partition_off
) % (s
+ 1);
435 thr
->ts
.place_partition_len
= s
+ 1;
440 thr
->ts
.place_partition_len
= s
;
442 thr
->ts
.place_partition_off
= p
;
446 /* T > P. Each subpartition will have just a single
447 place and we'll place between s and s+1
448 threads into each subpartition. */
449 s
= nthreads
/ thr
->ts
.place_partition_len
;
450 rest
= nthreads
% thr
->ts
.place_partition_len
;
451 thr
->ts
.place_partition_off
= p
;
452 thr
->ts
.place_partition_len
= 1;
459 bind
= omp_proc_bind_false
;
461 /* We only allow the reuse of idle threads for non-nested PARALLEL
462 regions. This appears to be implied by the semantics of
463 threadprivate variables, but perhaps that's reading too much into
464 things. Certainly it does prevent any locking problems, since
465 only the initial program thread will modify gomp_threads. */
468 old_threads_used
= pool
->threads_used
;
470 if (nthreads
<= old_threads_used
)
472 else if (old_threads_used
== 0)
475 gomp_simple_barrier_init (&pool
->threads_dock
, nthreads
);
479 n
= old_threads_used
;
481 /* Increase the barrier threshold to make sure all new
482 threads arrive before the team is released. */
483 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
486 /* Not true yet, but soon will be. We're going to release all
487 threads from the dock, and those that aren't part of the
489 pool
->threads_used
= nthreads
;
491 /* If necessary, expand the size of the gomp_threads array. It is
492 expected that changes in the number of threads are rare, thus we
493 make no effort to expand gomp_threads_size geometrically. */
494 if (nthreads
>= pool
->threads_size
)
496 pool
->threads_size
= nthreads
+ 1;
498 = gomp_realloc (pool
->threads
,
500 * sizeof (struct gomp_thread
*));
501 /* Add current (master) thread to threads[]. */
502 pool
->threads
[0] = thr
;
505 /* Release existing idle threads. */
508 unsigned int place_partition_off
= thr
->ts
.place_partition_off
;
509 unsigned int place_partition_len
= thr
->ts
.place_partition_len
;
510 unsigned int place
= 0;
511 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
515 case omp_proc_bind_true
:
516 case omp_proc_bind_close
:
520 if (p
== (team
->prev_ts
.place_partition_off
521 + team
->prev_ts
.place_partition_len
))
522 p
= team
->prev_ts
.place_partition_off
;
524 if (i
== nthreads
- rest
)
530 case omp_proc_bind_master
:
532 case omp_proc_bind_spread
:
540 if (p
== (team
->prev_ts
.place_partition_off
541 + team
->prev_ts
.place_partition_len
))
542 p
= team
->prev_ts
.place_partition_off
;
543 place_partition_off
= p
;
545 place_partition_len
= s
+ 1;
547 place_partition_len
= s
;
555 if (p
== (team
->prev_ts
.place_partition_off
556 + team
->prev_ts
.place_partition_len
))
557 p
= team
->prev_ts
.place_partition_off
;
559 if (i
== nthreads
- rest
)
564 place_partition_off
= p
;
565 place_partition_len
= 1;
569 if (affinity_thr
!= NULL
570 || (bind
!= omp_proc_bind_true
571 && pool
->threads
[i
]->place
!= p
+ 1)
572 || pool
->threads
[i
]->place
<= place_partition_off
573 || pool
->threads
[i
]->place
> (place_partition_off
574 + place_partition_len
))
577 force_display
= true;
578 if (affinity_thr
== NULL
)
582 if (team
->prev_ts
.place_partition_len
> 64)
584 = gomp_malloc (team
->prev_ts
.place_partition_len
585 * sizeof (struct gomp_thread
*));
588 = gomp_alloca (team
->prev_ts
.place_partition_len
589 * sizeof (struct gomp_thread
*));
590 memset (affinity_thr
, '\0',
591 team
->prev_ts
.place_partition_len
592 * sizeof (struct gomp_thread
*));
593 for (j
= i
; j
< old_threads_used
; j
++)
595 if (pool
->threads
[j
]->place
596 > team
->prev_ts
.place_partition_off
597 && (pool
->threads
[j
]->place
598 <= (team
->prev_ts
.place_partition_off
599 + team
->prev_ts
.place_partition_len
)))
601 l
= pool
->threads
[j
]->place
- 1
602 - team
->prev_ts
.place_partition_off
;
603 pool
->threads
[j
]->data
= affinity_thr
[l
];
604 affinity_thr
[l
] = pool
->threads
[j
];
606 pool
->threads
[j
] = NULL
;
608 if (nthreads
> old_threads_used
)
609 memset (&pool
->threads
[old_threads_used
],
610 '\0', ((nthreads
- old_threads_used
)
611 * sizeof (struct gomp_thread
*)));
613 affinity_count
= old_threads_used
- i
;
615 if (affinity_count
== 0)
618 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
621 if (bind
!= omp_proc_bind_true
)
623 for (l
= place_partition_off
;
624 l
< place_partition_off
+ place_partition_len
;
626 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
629 if (l
== place_partition_off
+ place_partition_len
)
632 nthr
= affinity_thr
[l
- team
->prev_ts
.place_partition_off
];
633 affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
634 = (struct gomp_thread
*) nthr
->data
;
636 pool
->threads
[i
] = nthr
;
639 nthr
= pool
->threads
[i
];
643 nthr
= pool
->threads
[i
];
644 nthr
->ts
.team
= team
;
645 nthr
->ts
.work_share
= &team
->work_shares
[0];
646 nthr
->ts
.last_work_share
= NULL
;
647 nthr
->ts
.team_id
= i
;
648 nthr
->ts
.level
= team
->prev_ts
.level
+ 1;
649 nthr
->ts
.active_level
= thr
->ts
.active_level
;
650 nthr
->ts
.place_partition_off
= place_partition_off
;
651 nthr
->ts
.place_partition_len
= place_partition_len
;
652 nthr
->ts
.def_allocator
= thr
->ts
.def_allocator
;
653 #ifdef HAVE_SYNC_BUILTINS
654 nthr
->ts
.single_count
= 0;
656 nthr
->ts
.static_trip
= 0;
657 nthr
->num_teams
= thr
->num_teams
;
658 nthr
->team_num
= thr
->team_num
;
659 nthr
->task
= &team
->implicit_task
[i
];
661 gomp_init_task (nthr
->task
, task
, icv
);
662 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
663 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
664 nthr
->task
->taskgroup
= taskgroup
;
667 team
->ordered_release
[i
] = &nthr
->release
;
670 if (__builtin_expect (affinity_thr
!= NULL
, 0))
672 /* If AFFINITY_THR is non-NULL just because we had to
673 permute some threads in the pool, but we've managed
674 to find exactly as many old threads as we'd find
675 without affinity, we don't need to handle this
676 specially anymore. */
677 if (nthreads
<= old_threads_used
678 ? (affinity_count
== old_threads_used
- nthreads
)
679 : (i
== old_threads_used
))
681 if (team
->prev_ts
.place_partition_len
> 64)
689 /* We are going to compute the places/subpartitions
690 again from the beginning. So, we need to reinitialize
691 vars modified by the switch (bind) above inside
692 of the loop, to the state they had after the initial
696 case omp_proc_bind_true
:
697 case omp_proc_bind_close
:
698 if (nthreads
> thr
->ts
.place_partition_len
)
699 /* T > P. S has been changed, so needs
701 s
= nthreads
/ thr
->ts
.place_partition_len
;
705 case omp_proc_bind_master
:
706 /* No vars have been changed. */
708 case omp_proc_bind_spread
:
709 p
= thr
->ts
.place_partition_off
;
713 s
= nthreads
/ team
->prev_ts
.place_partition_len
;
719 /* Increase the barrier threshold to make sure all new
720 threads and all the threads we're going to let die
721 arrive before the team is released. */
723 gomp_simple_barrier_reinit (&pool
->threads_dock
,
724 nthreads
+ affinity_count
);
733 if (__builtin_expect (nthreads
+ affinity_count
> old_threads_used
, 0))
735 long diff
= (long) (nthreads
+ affinity_count
) - (long) old_threads_used
;
737 if (old_threads_used
== 0)
740 #ifdef HAVE_SYNC_BUILTINS
741 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
743 gomp_mutex_lock (&gomp_managed_threads_lock
);
744 gomp_managed_threads
+= diff
;
745 gomp_mutex_unlock (&gomp_managed_threads_lock
);
749 attr
= &gomp_thread_attr
;
750 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
753 pthread_attr_init (&thread_attr
);
754 if (! pthread_attr_getstacksize (&gomp_thread_attr
, &stacksize
))
755 pthread_attr_setstacksize (&thread_attr
, stacksize
);
759 start_data
= gomp_alloca (sizeof (struct gomp_thread_start_data
)
762 /* Launch new threads. */
763 for (; i
< nthreads
; ++i
)
767 start_data
->ts
.place_partition_off
= thr
->ts
.place_partition_off
;
768 start_data
->ts
.place_partition_len
= thr
->ts
.place_partition_len
;
769 start_data
->place
= 0;
770 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
774 case omp_proc_bind_true
:
775 case omp_proc_bind_close
:
779 if (p
== (team
->prev_ts
.place_partition_off
780 + team
->prev_ts
.place_partition_len
))
781 p
= team
->prev_ts
.place_partition_off
;
783 if (i
== nthreads
- rest
)
789 case omp_proc_bind_master
:
791 case omp_proc_bind_spread
:
799 if (p
== (team
->prev_ts
.place_partition_off
800 + team
->prev_ts
.place_partition_len
))
801 p
= team
->prev_ts
.place_partition_off
;
802 start_data
->ts
.place_partition_off
= p
;
804 start_data
->ts
.place_partition_len
= s
+ 1;
806 start_data
->ts
.place_partition_len
= s
;
814 if (p
== (team
->prev_ts
.place_partition_off
815 + team
->prev_ts
.place_partition_len
))
816 p
= team
->prev_ts
.place_partition_off
;
818 if (i
== nthreads
- rest
)
823 start_data
->ts
.place_partition_off
= p
;
824 start_data
->ts
.place_partition_len
= 1;
828 start_data
->place
= p
+ 1;
829 if (affinity_thr
!= NULL
&& pool
->threads
[i
] != NULL
)
831 gomp_init_thread_affinity (attr
, p
);
835 start_data
->fn_data
= data
;
836 start_data
->ts
.team
= team
;
837 start_data
->ts
.work_share
= &team
->work_shares
[0];
838 start_data
->ts
.last_work_share
= NULL
;
839 start_data
->ts
.team_id
= i
;
840 start_data
->ts
.level
= team
->prev_ts
.level
+ 1;
841 start_data
->ts
.active_level
= thr
->ts
.active_level
;
842 start_data
->ts
.def_allocator
= thr
->ts
.def_allocator
;
843 #ifdef HAVE_SYNC_BUILTINS
844 start_data
->ts
.single_count
= 0;
846 start_data
->ts
.static_trip
= 0;
847 start_data
->num_teams
= thr
->num_teams
;
848 start_data
->team_num
= thr
->team_num
;
849 start_data
->task
= &team
->implicit_task
[i
];
850 gomp_init_task (start_data
->task
, task
, icv
);
851 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
852 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
853 start_data
->task
->taskgroup
= taskgroup
;
854 start_data
->thread_pool
= pool
;
855 start_data
->nested
= nested
;
857 attr
= gomp_adjust_thread_attr (attr
, &thread_attr
);
858 err
= pthread_create (&start_data
->handle
, attr
, gomp_thread_start
,
862 gomp_fatal ("Thread creation failed: %s", strerror (err
));
865 if (__builtin_expect (attr
== &thread_attr
, 0))
866 pthread_attr_destroy (&thread_attr
);
870 gomp_barrier_wait (&team
->barrier
);
872 gomp_simple_barrier_wait (&pool
->threads_dock
);
874 /* Decrease the barrier threshold to match the number of threads
875 that should arrive back at the end of this team. The extra
876 threads should be exiting. Note that we arrange for this test
877 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
878 the barrier as well as gomp_managed_threads was temporarily
879 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
880 AFFINITY_COUNT if non-zero will be always at least
881 OLD_THREADS_COUNT - NTHREADS. */
882 if (__builtin_expect (nthreads
< old_threads_used
, 0)
883 || __builtin_expect (affinity_count
, 0))
885 long diff
= (long) nthreads
- (long) old_threads_used
;
888 diff
= -affinity_count
;
890 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
892 #ifdef HAVE_SYNC_BUILTINS
893 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
895 gomp_mutex_lock (&gomp_managed_threads_lock
);
896 gomp_managed_threads
+= diff
;
897 gomp_mutex_unlock (&gomp_managed_threads_lock
);
900 if (__builtin_expect (gomp_display_affinity_var
, 0))
903 || nthreads
!= old_threads_used
906 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
910 start_data
-= nthreads
- 1;
911 for (i
= 1; i
< nthreads
; ++i
)
913 gomp_display_affinity_thread (
914 #ifdef LIBGOMP_USE_PTHREADS
926 for (i
= 1; i
< nthreads
; ++i
)
928 gomp_thread_handle handle
929 = gomp_thread_to_pthread_t (pool
->threads
[i
]);
930 gomp_display_affinity_thread (handle
, &pool
->threads
[i
]->ts
,
931 pool
->threads
[i
]->place
);
936 if (__builtin_expect (affinity_thr
!= NULL
, 0)
937 && team
->prev_ts
.place_partition_len
> 64)
943 /* Terminate the current team. This is only to be called by the master
944 thread. We assume that we must wait for the other threads. */
949 struct gomp_thread
*thr
= gomp_thread ();
950 struct gomp_team
*team
= thr
->ts
.team
;
952 /* This barrier handles all pending explicit threads.
953 As #pragma omp cancel parallel might get awaited count in
954 team->barrier in a inconsistent state, we need to use a different
956 gomp_team_barrier_wait_final (&team
->barrier
);
957 if (__builtin_expect (team
->team_cancelled
, 0))
959 struct gomp_work_share
*ws
= team
->work_shares_to_free
;
962 struct gomp_work_share
*next_ws
= gomp_ptrlock_get (&ws
->next_ws
);
964 gomp_ptrlock_set (&ws
->next_ws
, ws
);
965 gomp_fini_work_share (ws
);
971 gomp_fini_work_share (thr
->ts
.work_share
);
974 thr
->ts
= team
->prev_ts
;
976 if (__builtin_expect (thr
->ts
.level
!= 0, 0))
978 #ifdef HAVE_SYNC_BUILTINS
979 __sync_fetch_and_add (&gomp_managed_threads
, 1L - team
->nthreads
);
981 gomp_mutex_lock (&gomp_managed_threads_lock
);
982 gomp_managed_threads
-= team
->nthreads
- 1L;
983 gomp_mutex_unlock (&gomp_managed_threads_lock
);
985 /* This barrier has gomp_barrier_wait_last counterparts
986 and ensures the team can be safely destroyed. */
987 gomp_barrier_wait (&team
->barrier
);
990 if (__builtin_expect (team
->work_shares
[0].next_alloc
!= NULL
, 0))
992 struct gomp_work_share
*ws
= team
->work_shares
[0].next_alloc
;
995 struct gomp_work_share
*next_ws
= ws
->next_alloc
;
1001 gomp_sem_destroy (&team
->master_release
);
1003 if (__builtin_expect (thr
->ts
.team
!= NULL
, 0)
1004 || __builtin_expect (team
->nthreads
== 1, 0))
1008 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1009 if (pool
->last_team
)
1010 free_team (pool
->last_team
);
1011 pool
->last_team
= team
;
1012 gomp_release_thread_pool (pool
);
1016 #ifdef LIBGOMP_USE_PTHREADS
1018 /* Constructors for this file. */
1020 static void __attribute__((constructor
))
1021 initialize_team (void)
1023 #if !defined HAVE_TLS && !defined USE_EMUTLS
1024 static struct gomp_thread initial_thread_tls_data
;
1026 pthread_key_create (&gomp_tls_key
, NULL
);
1027 pthread_setspecific (gomp_tls_key
, &initial_thread_tls_data
);
1030 if (pthread_key_create (&gomp_thread_destructor
, gomp_free_thread
) != 0)
1031 gomp_fatal ("could not create thread pool destructor.");
1034 static void __attribute__((destructor
))
1035 team_destructor (void)
1037 /* Without this dlclose on libgomp could lead to subsequent
1039 pthread_key_delete (gomp_thread_destructor
);
1042 /* Similar to gomp_free_pool_helper, but don't detach itself,
1043 gomp_pause_host will pthread_join those threads. */
1046 gomp_pause_pool_helper (void *thread_pool
)
1048 struct gomp_thread
*thr
= gomp_thread ();
1049 struct gomp_thread_pool
*pool
1050 = (struct gomp_thread_pool
*) thread_pool
;
1051 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
1052 gomp_sem_destroy (&thr
->release
);
1053 thr
->thread_pool
= NULL
;
1055 pthread_exit (NULL
);
1058 /* Free a thread pool and release its threads. Return non-zero on
1062 gomp_pause_host (void)
1064 struct gomp_thread
*thr
= gomp_thread ();
1065 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1070 if (pool
->threads_used
> 0)
1074 = gomp_alloca (sizeof (pthread_t
) * pool
->threads_used
);
1075 for (i
= 1; i
< pool
->threads_used
; i
++)
1077 struct gomp_thread
*nthr
= pool
->threads
[i
];
1078 nthr
->fn
= gomp_pause_pool_helper
;
1080 thrs
[i
] = gomp_thread_to_pthread_t (nthr
);
1082 /* This barrier undocks threads docked on pool->threads_dock. */
1083 gomp_simple_barrier_wait (&pool
->threads_dock
);
1084 /* And this waits till all threads have called gomp_barrier_wait_last
1085 in gomp_pause_pool_helper. */
1086 gomp_simple_barrier_wait (&pool
->threads_dock
);
1087 /* Now it is safe to destroy the barrier and free the pool. */
1088 gomp_simple_barrier_destroy (&pool
->threads_dock
);
1090 #ifdef HAVE_SYNC_BUILTINS
1091 __sync_fetch_and_add (&gomp_managed_threads
,
1092 1L - pool
->threads_used
);
1094 gomp_mutex_lock (&gomp_managed_threads_lock
);
1095 gomp_managed_threads
-= pool
->threads_used
- 1L;
1096 gomp_mutex_unlock (&gomp_managed_threads_lock
);
1098 for (i
= 1; i
< pool
->threads_used
; i
++)
1099 pthread_join (thrs
[i
], NULL
);
1101 if (pool
->last_team
)
1102 free_team (pool
->last_team
);
1104 team_free (pool
->threads
);
1107 thr
->thread_pool
= NULL
;
1113 struct gomp_task_icv
*
1116 struct gomp_thread
*thr
= gomp_thread ();
1117 struct gomp_task
*task
= gomp_malloc (sizeof (struct gomp_task
));
1118 gomp_init_task (task
, NULL
, &gomp_global_icv
);
1120 #ifdef LIBGOMP_USE_PTHREADS
1121 pthread_setspecific (gomp_thread_destructor
, thr
);