1 /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr
;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor
;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread
struct gomp_thread gomp_tls_data
;
45 pthread_key_t gomp_tls_key
;
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
55 struct gomp_team_state ts
;
56 struct gomp_task
*task
;
57 struct gomp_thread_pool
*thread_pool
;
59 unsigned int num_teams
;
60 unsigned int team_num
;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
70 gomp_thread_start (void *xdata
)
72 struct gomp_thread_start_data
*data
= xdata
;
73 struct gomp_thread
*thr
;
74 struct gomp_thread_pool
*pool
;
75 void (*local_fn
) (void *);
78 #if defined HAVE_TLS || defined USE_EMUTLS
81 struct gomp_thread local_thr
;
84 gomp_sem_init (&thr
->release
, 0);
86 /* Extract what we need from data. */
88 local_data
= data
->fn_data
;
89 thr
->thread_pool
= data
->thread_pool
;
91 thr
->task
= data
->task
;
92 thr
->place
= data
->place
;
93 thr
->num_teams
= data
->num_teams
;
94 thr
->team_num
= data
->team_num
;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr
->handle
= data
->handle
;
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key
, thr
);
102 thr
->ts
.team
->ordered_release
[thr
->ts
.team_id
] = &thr
->release
;
104 /* Make thread pool local. */
105 pool
= thr
->thread_pool
;
109 struct gomp_team
*team
= thr
->ts
.team
;
110 struct gomp_task
*task
= thr
->task
;
112 gomp_barrier_wait (&team
->barrier
);
114 local_fn (local_data
);
115 gomp_team_barrier_wait_final (&team
->barrier
);
116 gomp_finish_task (task
);
117 gomp_barrier_wait_last (&team
->barrier
);
121 pool
->threads
[thr
->ts
.team_id
] = thr
;
123 gomp_simple_barrier_wait (&pool
->threads_dock
);
126 struct gomp_team
*team
= thr
->ts
.team
;
127 struct gomp_task
*task
= thr
->task
;
129 local_fn (local_data
);
130 gomp_team_barrier_wait_final (&team
->barrier
);
131 gomp_finish_task (task
);
133 gomp_simple_barrier_wait (&pool
->threads_dock
);
136 local_data
= thr
->data
;
142 gomp_sem_destroy (&thr
->release
);
143 pthread_detach (pthread_self ());
144 thr
->thread_pool
= NULL
;
150 static inline struct gomp_team
*
151 get_last_team (unsigned nthreads
)
153 struct gomp_thread
*thr
= gomp_thread ();
154 if (thr
->ts
.team
== NULL
)
156 struct gomp_thread_pool
*pool
= gomp_get_thread_pool (thr
, nthreads
);
157 struct gomp_team
*last_team
= pool
->last_team
;
158 if (last_team
!= NULL
&& last_team
->nthreads
== nthreads
)
160 pool
->last_team
= NULL
;
167 /* Create a new team data structure. */
170 gomp_new_team (unsigned nthreads
)
172 struct gomp_team
*team
;
175 team
= get_last_team (nthreads
);
178 size_t extra
= sizeof (team
->ordered_release
[0])
179 + sizeof (team
->implicit_task
[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team
= gomp_aligned_alloc (__alignof (struct gomp_team
),
182 sizeof (*team
) + nthreads
* extra
);
184 team
= team_malloc (sizeof (*team
) + nthreads
* extra
);
187 #ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team
->work_share_list_free_lock
);
190 gomp_barrier_init (&team
->barrier
, nthreads
);
191 gomp_mutex_init (&team
->task_lock
);
193 team
->nthreads
= nthreads
;
196 team
->work_share_chunk
= 8;
197 #ifdef HAVE_SYNC_BUILTINS
198 team
->single_count
= 0;
200 team
->work_shares_to_free
= &team
->work_shares
[0];
201 gomp_init_work_share (&team
->work_shares
[0], 0, nthreads
);
202 team
->work_shares
[0].next_alloc
= NULL
;
203 team
->work_share_list_free
= NULL
;
204 team
->work_share_list_alloc
= &team
->work_shares
[1];
205 for (i
= 1; i
< 7; i
++)
206 team
->work_shares
[i
].next_free
= &team
->work_shares
[i
+ 1];
207 team
->work_shares
[i
].next_free
= NULL
;
209 gomp_sem_init (&team
->master_release
, 0);
210 team
->ordered_release
= (void *) &team
->implicit_task
[nthreads
];
211 team
->ordered_release
[0] = &team
->master_release
;
213 priority_queue_init (&team
->task_queue
);
214 team
->task_count
= 0;
215 team
->task_queued_count
= 0;
216 team
->task_running_count
= 0;
217 team
->work_share_cancelled
= 0;
218 team
->team_cancelled
= 0;
220 team
->task_detach_count
= 0;
226 /* Free a team data structure. */
229 free_team (struct gomp_team
*team
)
231 #ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team
->work_share_list_free_lock
);
234 gomp_barrier_destroy (&team
->barrier
);
235 gomp_mutex_destroy (&team
->task_lock
);
236 priority_queue_free (&team
->task_queue
);
241 gomp_free_pool_helper (void *thread_pool
)
243 struct gomp_thread
*thr
= gomp_thread ();
244 struct gomp_thread_pool
*pool
245 = (struct gomp_thread_pool
*) thread_pool
;
246 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
247 gomp_sem_destroy (&thr
->release
);
248 thr
->thread_pool
= NULL
;
250 #ifdef LIBGOMP_USE_PTHREADS
251 pthread_detach (pthread_self ());
253 #elif defined(__nvptx__)
255 #elif defined(__AMDGCN__)
256 asm ("s_dcache_wb\n\t"
259 #error gomp_free_pool_helper must terminate the thread
263 /* Free a thread pool and release its threads. */
266 gomp_free_thread (void *arg
__attribute__((unused
)))
268 struct gomp_thread
*thr
= gomp_thread ();
269 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
272 if (pool
->threads_used
> 0)
275 for (i
= 1; i
< pool
->threads_used
; i
++)
277 struct gomp_thread
*nthr
= pool
->threads
[i
];
278 nthr
->fn
= gomp_free_pool_helper
;
281 /* This barrier undocks threads docked on pool->threads_dock. */
282 gomp_simple_barrier_wait (&pool
->threads_dock
);
283 /* And this waits till all threads have called gomp_barrier_wait_last
284 in gomp_free_pool_helper. */
285 gomp_simple_barrier_wait (&pool
->threads_dock
);
286 /* Now it is safe to destroy the barrier and free the pool. */
287 gomp_simple_barrier_destroy (&pool
->threads_dock
);
289 #ifdef HAVE_SYNC_BUILTINS
290 __sync_fetch_and_add (&gomp_managed_threads
,
291 1L - pool
->threads_used
);
293 gomp_mutex_lock (&gomp_managed_threads_lock
);
294 gomp_managed_threads
-= pool
->threads_used
- 1L;
295 gomp_mutex_unlock (&gomp_managed_threads_lock
);
299 free_team (pool
->last_team
);
301 team_free (pool
->threads
);
304 thr
->thread_pool
= NULL
;
306 if (thr
->ts
.level
== 0 && __builtin_expect (thr
->ts
.team
!= NULL
, 0))
308 if (thr
->task
!= NULL
)
310 struct gomp_task
*task
= thr
->task
;
318 #ifdef LIBGOMP_USE_PTHREADS
320 gomp_team_start (void (*fn
) (void *), void *data
, unsigned nthreads
,
321 unsigned flags
, struct gomp_team
*team
,
322 struct gomp_taskgroup
*taskgroup
)
324 struct gomp_thread_start_data
*start_data
= NULL
;
325 struct gomp_thread
*thr
, *nthr
;
326 struct gomp_task
*task
;
327 struct gomp_task_icv
*icv
;
329 struct gomp_thread_pool
*pool
;
330 unsigned i
, n
, old_threads_used
= 0;
331 pthread_attr_t thread_attr
, *attr
;
332 unsigned long nthreads_var
;
334 unsigned int s
= 0, rest
= 0, p
= 0, k
= 0;
335 unsigned int affinity_count
= 0;
336 struct gomp_thread
**affinity_thr
= NULL
;
337 bool force_display
= false;
339 thr
= gomp_thread ();
340 nested
= thr
->ts
.level
;
341 pool
= thr
->thread_pool
;
343 icv
= task
? &task
->icv
: &gomp_global_icv
;
344 if (__builtin_expect (gomp_places_list
!= NULL
, 0) && thr
->place
== 0)
346 gomp_init_affinity ();
347 if (__builtin_expect (gomp_display_affinity_var
, 0) && nthreads
== 1)
348 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
352 /* Always save the previous state, even if this isn't a nested team.
353 In particular, we should save any work share state from an outer
354 orphaned work share construct. */
355 team
->prev_ts
= thr
->ts
;
361 ++thr
->ts
.active_level
;
362 thr
->ts
.work_share
= &team
->work_shares
[0];
363 thr
->ts
.last_work_share
= NULL
;
364 #ifdef HAVE_SYNC_BUILTINS
365 thr
->ts
.single_count
= 0;
367 thr
->ts
.static_trip
= 0;
368 thr
->task
= &team
->implicit_task
[0];
369 #ifdef GOMP_NEEDS_THREAD_HANDLE
370 thr
->handle
= pthread_self ();
372 nthreads_var
= icv
->nthreads_var
;
373 if (__builtin_expect (gomp_nthreads_var_list
!= NULL
, 0)
374 && thr
->ts
.level
< gomp_nthreads_var_list_len
)
375 nthreads_var
= gomp_nthreads_var_list
[thr
->ts
.level
];
376 bind_var
= icv
->bind_var
;
377 if (bind_var
!= omp_proc_bind_false
&& (flags
& 7) != omp_proc_bind_false
)
378 bind_var
= flags
& 7;
380 if (__builtin_expect (gomp_bind_var_list
!= NULL
, 0)
381 && thr
->ts
.level
< gomp_bind_var_list_len
)
382 bind_var
= gomp_bind_var_list
[thr
->ts
.level
];
383 gomp_init_task (thr
->task
, task
, icv
);
384 thr
->task
->taskgroup
= taskgroup
;
385 team
->implicit_task
[0].icv
.nthreads_var
= nthreads_var
;
386 team
->implicit_task
[0].icv
.bind_var
= bind_var
;
393 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
395 /* Depending on chosen proc_bind model, set subpartition
396 for the master thread and initialize helper variables
397 P and optionally S, K and/or REST used by later place
398 computation for each additional thread. */
402 case omp_proc_bind_true
:
403 case omp_proc_bind_close
:
404 if (nthreads
> thr
->ts
.place_partition_len
)
406 /* T > P. S threads will be placed in each place,
407 and the final REM threads placed one by one
408 into the already occupied places. */
409 s
= nthreads
/ thr
->ts
.place_partition_len
;
410 rest
= nthreads
% thr
->ts
.place_partition_len
;
416 case omp_proc_bind_master
:
417 /* Each thread will be bound to master's place. */
419 case omp_proc_bind_spread
:
420 if (nthreads
<= thr
->ts
.place_partition_len
)
422 /* T <= P. Each subpartition will have in between s
423 and s+1 places (subpartitions starting at or
424 after rest will have s places, earlier s+1 places),
425 each thread will be bound to the first place in
426 its subpartition (except for the master thread
427 that can be bound to another place in its
429 s
= thr
->ts
.place_partition_len
/ nthreads
;
430 rest
= thr
->ts
.place_partition_len
% nthreads
;
431 rest
= (s
+ 1) * rest
+ thr
->ts
.place_partition_off
;
434 p
-= (p
- thr
->ts
.place_partition_off
) % (s
+ 1);
435 thr
->ts
.place_partition_len
= s
+ 1;
440 thr
->ts
.place_partition_len
= s
;
442 thr
->ts
.place_partition_off
= p
;
446 /* T > P. Each subpartition will have just a single
447 place and we'll place between s and s+1
448 threads into each subpartition. */
449 s
= nthreads
/ thr
->ts
.place_partition_len
;
450 rest
= nthreads
% thr
->ts
.place_partition_len
;
451 thr
->ts
.place_partition_off
= p
;
452 thr
->ts
.place_partition_len
= 1;
459 bind
= omp_proc_bind_false
;
461 /* We only allow the reuse of idle threads for non-nested PARALLEL
462 regions. This appears to be implied by the semantics of
463 threadprivate variables, but perhaps that's reading too much into
464 things. Certainly it does prevent any locking problems, since
465 only the initial program thread will modify gomp_threads. */
468 old_threads_used
= pool
->threads_used
;
470 if (nthreads
<= old_threads_used
)
472 else if (old_threads_used
== 0)
475 gomp_simple_barrier_init (&pool
->threads_dock
, nthreads
);
479 n
= old_threads_used
;
481 /* Increase the barrier threshold to make sure all new
482 threads arrive before the team is released. */
483 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
486 /* Not true yet, but soon will be. We're going to release all
487 threads from the dock, and those that aren't part of the
489 pool
->threads_used
= nthreads
;
491 /* If necessary, expand the size of the gomp_threads array. It is
492 expected that changes in the number of threads are rare, thus we
493 make no effort to expand gomp_threads_size geometrically. */
494 if (nthreads
>= pool
->threads_size
)
496 pool
->threads_size
= nthreads
+ 1;
498 = gomp_realloc (pool
->threads
,
500 * sizeof (struct gomp_thread
*));
501 /* Add current (master) thread to threads[]. */
502 pool
->threads
[0] = thr
;
505 /* Release existing idle threads. */
508 unsigned int place_partition_off
= thr
->ts
.place_partition_off
;
509 unsigned int place_partition_len
= thr
->ts
.place_partition_len
;
510 unsigned int place
= 0;
511 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
515 case omp_proc_bind_true
:
516 case omp_proc_bind_close
:
520 if (p
== (team
->prev_ts
.place_partition_off
521 + team
->prev_ts
.place_partition_len
))
522 p
= team
->prev_ts
.place_partition_off
;
524 if (i
== nthreads
- rest
)
530 case omp_proc_bind_master
:
532 case omp_proc_bind_spread
:
540 if (p
== (team
->prev_ts
.place_partition_off
541 + team
->prev_ts
.place_partition_len
))
542 p
= team
->prev_ts
.place_partition_off
;
543 place_partition_off
= p
;
545 place_partition_len
= s
+ 1;
547 place_partition_len
= s
;
555 if (p
== (team
->prev_ts
.place_partition_off
556 + team
->prev_ts
.place_partition_len
))
557 p
= team
->prev_ts
.place_partition_off
;
559 if (i
== nthreads
- rest
)
564 place_partition_off
= p
;
565 place_partition_len
= 1;
569 if (affinity_thr
!= NULL
570 || (bind
!= omp_proc_bind_true
571 && pool
->threads
[i
]->place
!= p
+ 1)
572 || pool
->threads
[i
]->place
<= place_partition_off
573 || pool
->threads
[i
]->place
> (place_partition_off
574 + place_partition_len
))
577 force_display
= true;
578 if (affinity_thr
== NULL
)
582 if (team
->prev_ts
.place_partition_len
> 64)
584 = gomp_malloc (team
->prev_ts
.place_partition_len
585 * sizeof (struct gomp_thread
*));
588 = gomp_alloca (team
->prev_ts
.place_partition_len
589 * sizeof (struct gomp_thread
*));
590 memset (affinity_thr
, '\0',
591 team
->prev_ts
.place_partition_len
592 * sizeof (struct gomp_thread
*));
593 for (j
= i
; j
< old_threads_used
; j
++)
595 if (pool
->threads
[j
]->place
596 > team
->prev_ts
.place_partition_off
597 && (pool
->threads
[j
]->place
598 <= (team
->prev_ts
.place_partition_off
599 + team
->prev_ts
.place_partition_len
)))
601 l
= pool
->threads
[j
]->place
- 1
602 - team
->prev_ts
.place_partition_off
;
603 pool
->threads
[j
]->data
= affinity_thr
[l
];
604 affinity_thr
[l
] = pool
->threads
[j
];
606 pool
->threads
[j
] = NULL
;
608 if (nthreads
> old_threads_used
)
609 memset (&pool
->threads
[old_threads_used
],
610 '\0', ((nthreads
- old_threads_used
)
611 * sizeof (struct gomp_thread
*)));
613 affinity_count
= old_threads_used
- i
;
615 if (affinity_count
== 0)
618 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
621 if (bind
!= omp_proc_bind_true
)
623 for (l
= place_partition_off
;
624 l
< place_partition_off
+ place_partition_len
;
626 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
629 if (l
== place_partition_off
+ place_partition_len
)
632 nthr
= affinity_thr
[l
- team
->prev_ts
.place_partition_off
];
633 affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
634 = (struct gomp_thread
*) nthr
->data
;
636 pool
->threads
[i
] = nthr
;
639 nthr
= pool
->threads
[i
];
643 nthr
= pool
->threads
[i
];
644 nthr
->ts
.team
= team
;
645 nthr
->ts
.work_share
= &team
->work_shares
[0];
646 nthr
->ts
.last_work_share
= NULL
;
647 nthr
->ts
.team_id
= i
;
648 nthr
->ts
.level
= team
->prev_ts
.level
+ 1;
649 nthr
->ts
.active_level
= thr
->ts
.active_level
;
650 nthr
->ts
.place_partition_off
= place_partition_off
;
651 nthr
->ts
.place_partition_len
= place_partition_len
;
652 nthr
->ts
.def_allocator
= thr
->ts
.def_allocator
;
653 #ifdef HAVE_SYNC_BUILTINS
654 nthr
->ts
.single_count
= 0;
656 nthr
->ts
.static_trip
= 0;
657 nthr
->num_teams
= thr
->num_teams
;
658 nthr
->team_num
= thr
->team_num
;
659 nthr
->task
= &team
->implicit_task
[i
];
661 gomp_init_task (nthr
->task
, task
, icv
);
662 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
663 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
664 nthr
->task
->taskgroup
= taskgroup
;
667 team
->ordered_release
[i
] = &nthr
->release
;
670 if (__builtin_expect (affinity_thr
!= NULL
, 0))
672 /* If AFFINITY_THR is non-NULL just because we had to
673 permute some threads in the pool, but we've managed
674 to find exactly as many old threads as we'd find
675 without affinity, we don't need to handle this
676 specially anymore. */
677 if (nthreads
<= old_threads_used
678 ? (affinity_count
== old_threads_used
- nthreads
)
679 : (i
== old_threads_used
))
681 if (team
->prev_ts
.place_partition_len
> 64)
689 /* We are going to compute the places/subpartitions
690 again from the beginning. So, we need to reinitialize
691 vars modified by the switch (bind) above inside
692 of the loop, to the state they had after the initial
696 case omp_proc_bind_true
:
697 case omp_proc_bind_close
:
698 if (nthreads
> thr
->ts
.place_partition_len
)
699 /* T > P. S has been changed, so needs
701 s
= nthreads
/ thr
->ts
.place_partition_len
;
705 case omp_proc_bind_master
:
706 /* No vars have been changed. */
708 case omp_proc_bind_spread
:
709 p
= thr
->ts
.place_partition_off
;
713 s
= nthreads
/ team
->prev_ts
.place_partition_len
;
719 /* Increase the barrier threshold to make sure all new
720 threads and all the threads we're going to let die
721 arrive before the team is released. */
723 gomp_simple_barrier_reinit (&pool
->threads_dock
,
724 nthreads
+ affinity_count
);
733 if (__builtin_expect (nthreads
+ affinity_count
> old_threads_used
, 0))
735 long diff
= (long) (nthreads
+ affinity_count
) - (long) old_threads_used
;
737 if (old_threads_used
== 0)
740 #ifdef HAVE_SYNC_BUILTINS
741 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
743 gomp_mutex_lock (&gomp_managed_threads_lock
);
744 gomp_managed_threads
+= diff
;
745 gomp_mutex_unlock (&gomp_managed_threads_lock
);
749 attr
= &gomp_thread_attr
;
750 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
753 pthread_attr_init (&thread_attr
);
754 if (! pthread_attr_getstacksize (&gomp_thread_attr
, &stacksize
))
755 pthread_attr_setstacksize (&thread_attr
, stacksize
);
760 __builtin_unreachable ();
761 start_data
= gomp_alloca (sizeof (struct gomp_thread_start_data
)
764 /* Launch new threads. */
765 for (; i
< nthreads
; ++i
)
769 start_data
->ts
.place_partition_off
= thr
->ts
.place_partition_off
;
770 start_data
->ts
.place_partition_len
= thr
->ts
.place_partition_len
;
771 start_data
->place
= 0;
772 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
776 case omp_proc_bind_true
:
777 case omp_proc_bind_close
:
781 if (p
== (team
->prev_ts
.place_partition_off
782 + team
->prev_ts
.place_partition_len
))
783 p
= team
->prev_ts
.place_partition_off
;
785 if (i
== nthreads
- rest
)
791 case omp_proc_bind_master
:
793 case omp_proc_bind_spread
:
801 if (p
== (team
->prev_ts
.place_partition_off
802 + team
->prev_ts
.place_partition_len
))
803 p
= team
->prev_ts
.place_partition_off
;
804 start_data
->ts
.place_partition_off
= p
;
806 start_data
->ts
.place_partition_len
= s
+ 1;
808 start_data
->ts
.place_partition_len
= s
;
816 if (p
== (team
->prev_ts
.place_partition_off
817 + team
->prev_ts
.place_partition_len
))
818 p
= team
->prev_ts
.place_partition_off
;
820 if (i
== nthreads
- rest
)
825 start_data
->ts
.place_partition_off
= p
;
826 start_data
->ts
.place_partition_len
= 1;
830 start_data
->place
= p
+ 1;
831 if (affinity_thr
!= NULL
&& pool
->threads
[i
] != NULL
)
833 gomp_init_thread_affinity (attr
, p
);
837 start_data
->fn_data
= data
;
838 start_data
->ts
.team
= team
;
839 start_data
->ts
.work_share
= &team
->work_shares
[0];
840 start_data
->ts
.last_work_share
= NULL
;
841 start_data
->ts
.team_id
= i
;
842 start_data
->ts
.level
= team
->prev_ts
.level
+ 1;
843 start_data
->ts
.active_level
= thr
->ts
.active_level
;
844 start_data
->ts
.def_allocator
= thr
->ts
.def_allocator
;
845 #ifdef HAVE_SYNC_BUILTINS
846 start_data
->ts
.single_count
= 0;
848 start_data
->ts
.static_trip
= 0;
849 start_data
->num_teams
= thr
->num_teams
;
850 start_data
->team_num
= thr
->team_num
;
851 start_data
->task
= &team
->implicit_task
[i
];
852 gomp_init_task (start_data
->task
, task
, icv
);
853 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
854 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
855 start_data
->task
->taskgroup
= taskgroup
;
856 start_data
->thread_pool
= pool
;
857 start_data
->nested
= nested
;
859 attr
= gomp_adjust_thread_attr (attr
, &thread_attr
);
860 err
= pthread_create (&start_data
->handle
, attr
, gomp_thread_start
,
864 gomp_fatal ("Thread creation failed: %s", strerror (err
));
867 if (__builtin_expect (attr
== &thread_attr
, 0))
868 pthread_attr_destroy (&thread_attr
);
872 gomp_barrier_wait (&team
->barrier
);
874 gomp_simple_barrier_wait (&pool
->threads_dock
);
876 /* Decrease the barrier threshold to match the number of threads
877 that should arrive back at the end of this team. The extra
878 threads should be exiting. Note that we arrange for this test
879 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
880 the barrier as well as gomp_managed_threads was temporarily
881 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
882 AFFINITY_COUNT if non-zero will be always at least
883 OLD_THREADS_COUNT - NTHREADS. */
884 if (__builtin_expect (nthreads
< old_threads_used
, 0)
885 || __builtin_expect (affinity_count
, 0))
887 long diff
= (long) nthreads
- (long) old_threads_used
;
890 diff
= -affinity_count
;
892 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
894 #ifdef HAVE_SYNC_BUILTINS
895 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
897 gomp_mutex_lock (&gomp_managed_threads_lock
);
898 gomp_managed_threads
+= diff
;
899 gomp_mutex_unlock (&gomp_managed_threads_lock
);
902 if (__builtin_expect (gomp_display_affinity_var
, 0))
905 || nthreads
!= old_threads_used
908 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
912 start_data
-= nthreads
- 1;
913 for (i
= 1; i
< nthreads
; ++i
)
915 gomp_display_affinity_thread (
916 #ifdef LIBGOMP_USE_PTHREADS
928 for (i
= 1; i
< nthreads
; ++i
)
930 gomp_thread_handle handle
931 = gomp_thread_to_pthread_t (pool
->threads
[i
]);
932 gomp_display_affinity_thread (handle
, &pool
->threads
[i
]->ts
,
933 pool
->threads
[i
]->place
);
938 if (__builtin_expect (affinity_thr
!= NULL
, 0)
939 && team
->prev_ts
.place_partition_len
> 64)
945 /* Terminate the current team. This is only to be called by the master
946 thread. We assume that we must wait for the other threads. */
951 struct gomp_thread
*thr
= gomp_thread ();
952 struct gomp_team
*team
= thr
->ts
.team
;
954 /* This barrier handles all pending explicit threads.
955 As #pragma omp cancel parallel might get awaited count in
956 team->barrier in a inconsistent state, we need to use a different
958 gomp_team_barrier_wait_final (&team
->barrier
);
959 if (__builtin_expect (team
->team_cancelled
, 0))
961 struct gomp_work_share
*ws
= team
->work_shares_to_free
;
964 struct gomp_work_share
*next_ws
= gomp_ptrlock_get (&ws
->next_ws
);
966 gomp_ptrlock_set (&ws
->next_ws
, ws
);
967 gomp_fini_work_share (ws
);
973 gomp_fini_work_share (thr
->ts
.work_share
);
976 thr
->ts
= team
->prev_ts
;
978 if (__builtin_expect (thr
->ts
.level
!= 0, 0))
980 #ifdef HAVE_SYNC_BUILTINS
981 __sync_fetch_and_add (&gomp_managed_threads
, 1L - team
->nthreads
);
983 gomp_mutex_lock (&gomp_managed_threads_lock
);
984 gomp_managed_threads
-= team
->nthreads
- 1L;
985 gomp_mutex_unlock (&gomp_managed_threads_lock
);
987 /* This barrier has gomp_barrier_wait_last counterparts
988 and ensures the team can be safely destroyed. */
989 gomp_barrier_wait (&team
->barrier
);
992 if (__builtin_expect (team
->work_shares
[0].next_alloc
!= NULL
, 0))
994 struct gomp_work_share
*ws
= team
->work_shares
[0].next_alloc
;
997 struct gomp_work_share
*next_ws
= ws
->next_alloc
;
1003 gomp_sem_destroy (&team
->master_release
);
1005 if (__builtin_expect (thr
->ts
.team
!= NULL
, 0)
1006 || __builtin_expect (team
->nthreads
== 1, 0))
1010 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1011 if (pool
->last_team
)
1012 free_team (pool
->last_team
);
1013 pool
->last_team
= team
;
1014 gomp_release_thread_pool (pool
);
1018 #ifdef LIBGOMP_USE_PTHREADS
1020 /* Constructors for this file. */
1022 static void __attribute__((constructor
))
1023 initialize_team (void)
1025 #if !defined HAVE_TLS && !defined USE_EMUTLS
1026 static struct gomp_thread initial_thread_tls_data
;
1028 pthread_key_create (&gomp_tls_key
, NULL
);
1029 pthread_setspecific (gomp_tls_key
, &initial_thread_tls_data
);
1032 if (pthread_key_create (&gomp_thread_destructor
, gomp_free_thread
) != 0)
1033 gomp_fatal ("could not create thread pool destructor.");
1036 static void __attribute__((destructor
))
1037 team_destructor (void)
1039 /* Without this dlclose on libgomp could lead to subsequent
1041 pthread_key_delete (gomp_thread_destructor
);
1044 /* Similar to gomp_free_pool_helper, but don't detach itself,
1045 gomp_pause_host will pthread_join those threads. */
1048 gomp_pause_pool_helper (void *thread_pool
)
1050 struct gomp_thread
*thr
= gomp_thread ();
1051 struct gomp_thread_pool
*pool
1052 = (struct gomp_thread_pool
*) thread_pool
;
1053 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
1054 gomp_sem_destroy (&thr
->release
);
1055 thr
->thread_pool
= NULL
;
1057 pthread_exit (NULL
);
1060 /* Free a thread pool and release its threads. Return non-zero on
1064 gomp_pause_host (void)
1066 struct gomp_thread
*thr
= gomp_thread ();
1067 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1072 if (pool
->threads_used
> 0)
1076 = gomp_alloca (sizeof (pthread_t
) * pool
->threads_used
);
1077 for (i
= 1; i
< pool
->threads_used
; i
++)
1079 struct gomp_thread
*nthr
= pool
->threads
[i
];
1080 nthr
->fn
= gomp_pause_pool_helper
;
1082 thrs
[i
] = gomp_thread_to_pthread_t (nthr
);
1084 /* This barrier undocks threads docked on pool->threads_dock. */
1085 gomp_simple_barrier_wait (&pool
->threads_dock
);
1086 /* And this waits till all threads have called gomp_barrier_wait_last
1087 in gomp_pause_pool_helper. */
1088 gomp_simple_barrier_wait (&pool
->threads_dock
);
1089 /* Now it is safe to destroy the barrier and free the pool. */
1090 gomp_simple_barrier_destroy (&pool
->threads_dock
);
1092 #ifdef HAVE_SYNC_BUILTINS
1093 __sync_fetch_and_add (&gomp_managed_threads
,
1094 1L - pool
->threads_used
);
1096 gomp_mutex_lock (&gomp_managed_threads_lock
);
1097 gomp_managed_threads
-= pool
->threads_used
- 1L;
1098 gomp_mutex_unlock (&gomp_managed_threads_lock
);
1100 for (i
= 1; i
< pool
->threads_used
; i
++)
1101 pthread_join (thrs
[i
], NULL
);
1103 if (pool
->last_team
)
1104 free_team (pool
->last_team
);
1106 team_free (pool
->threads
);
1109 thr
->thread_pool
= NULL
;
1115 struct gomp_task_icv
*
1118 struct gomp_thread
*thr
= gomp_thread ();
1119 struct gomp_task
*task
= gomp_malloc (sizeof (struct gomp_task
));
1120 gomp_init_task (task
, NULL
, &gomp_global_icv
);
1122 #ifdef LIBGOMP_USE_PTHREADS
1123 pthread_setspecific (gomp_thread_destructor
, thr
);