1 /* Copyright (C) 2005-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr
;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor
;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread
struct gomp_thread gomp_tls_data
;
45 pthread_key_t gomp_tls_key
;
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
55 struct gomp_team_state ts
;
56 struct gomp_task
*task
;
57 struct gomp_thread_pool
*thread_pool
;
59 unsigned int num_teams
;
60 unsigned int team_num
;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
70 gomp_thread_start (void *xdata
)
72 struct gomp_thread_start_data
*data
= xdata
;
73 struct gomp_thread
*thr
;
74 struct gomp_thread_pool
*pool
;
75 void (*local_fn
) (void *);
78 #if defined HAVE_TLS || defined USE_EMUTLS
81 struct gomp_thread local_thr
;
84 gomp_sem_init (&thr
->release
, 0);
86 /* Extract what we need from data. */
88 local_data
= data
->fn_data
;
89 thr
->thread_pool
= data
->thread_pool
;
91 thr
->task
= data
->task
;
92 thr
->place
= data
->place
;
93 thr
->num_teams
= data
->num_teams
;
94 thr
->team_num
= data
->team_num
;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr
->handle
= data
->handle
;
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key
, thr
);
102 thr
->ts
.team
->ordered_release
[thr
->ts
.team_id
] = &thr
->release
;
104 /* Make thread pool local. */
105 pool
= thr
->thread_pool
;
109 struct gomp_team
*team
= thr
->ts
.team
;
110 struct gomp_task
*task
= thr
->task
;
112 gomp_barrier_wait (&team
->barrier
);
114 local_fn (local_data
);
115 gomp_team_barrier_wait_final (&team
->barrier
);
116 gomp_finish_task (task
);
117 gomp_barrier_wait_last (&team
->barrier
);
121 pool
->threads
[thr
->ts
.team_id
] = thr
;
123 gomp_simple_barrier_wait (&pool
->threads_dock
);
126 struct gomp_team
*team
= thr
->ts
.team
;
127 struct gomp_task
*task
= thr
->task
;
129 local_fn (local_data
);
130 gomp_team_barrier_wait_final (&team
->barrier
);
131 gomp_finish_task (task
);
133 gomp_simple_barrier_wait (&pool
->threads_dock
);
136 local_data
= thr
->data
;
142 gomp_sem_destroy (&thr
->release
);
143 pthread_detach (pthread_self ());
144 thr
->thread_pool
= NULL
;
150 static inline struct gomp_team
*
151 get_last_team (unsigned nthreads
)
153 struct gomp_thread
*thr
= gomp_thread ();
154 if (thr
->ts
.team
== NULL
)
156 struct gomp_thread_pool
*pool
= gomp_get_thread_pool (thr
, nthreads
);
157 struct gomp_team
*last_team
= pool
->last_team
;
158 if (last_team
!= NULL
&& last_team
->nthreads
== nthreads
)
160 pool
->last_team
= NULL
;
167 /* Create a new team data structure. */
170 gomp_new_team (unsigned nthreads
)
172 struct gomp_team
*team
;
175 team
= get_last_team (nthreads
);
178 size_t extra
= sizeof (team
->ordered_release
[0])
179 + sizeof (team
->implicit_task
[0]);
180 team
= team_malloc (sizeof (*team
) + nthreads
* extra
);
182 #ifndef HAVE_SYNC_BUILTINS
183 gomp_mutex_init (&team
->work_share_list_free_lock
);
185 gomp_barrier_init (&team
->barrier
, nthreads
);
186 gomp_mutex_init (&team
->task_lock
);
188 team
->nthreads
= nthreads
;
191 team
->work_share_chunk
= 8;
192 #ifdef HAVE_SYNC_BUILTINS
193 team
->single_count
= 0;
195 team
->work_shares_to_free
= &team
->work_shares
[0];
196 gomp_init_work_share (&team
->work_shares
[0], 0, nthreads
);
197 team
->work_shares
[0].next_alloc
= NULL
;
198 team
->work_share_list_free
= NULL
;
199 team
->work_share_list_alloc
= &team
->work_shares
[1];
200 for (i
= 1; i
< 7; i
++)
201 team
->work_shares
[i
].next_free
= &team
->work_shares
[i
+ 1];
202 team
->work_shares
[i
].next_free
= NULL
;
204 gomp_sem_init (&team
->master_release
, 0);
205 team
->ordered_release
= (void *) &team
->implicit_task
[nthreads
];
206 team
->ordered_release
[0] = &team
->master_release
;
208 priority_queue_init (&team
->task_queue
);
209 team
->task_count
= 0;
210 team
->task_queued_count
= 0;
211 team
->task_running_count
= 0;
212 team
->work_share_cancelled
= 0;
213 team
->team_cancelled
= 0;
215 team
->task_detach_count
= 0;
221 /* Free a team data structure. */
224 free_team (struct gomp_team
*team
)
226 #ifndef HAVE_SYNC_BUILTINS
227 gomp_mutex_destroy (&team
->work_share_list_free_lock
);
229 gomp_barrier_destroy (&team
->barrier
);
230 gomp_mutex_destroy (&team
->task_lock
);
231 priority_queue_free (&team
->task_queue
);
236 gomp_free_pool_helper (void *thread_pool
)
238 struct gomp_thread
*thr
= gomp_thread ();
239 struct gomp_thread_pool
*pool
240 = (struct gomp_thread_pool
*) thread_pool
;
241 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
242 gomp_sem_destroy (&thr
->release
);
243 thr
->thread_pool
= NULL
;
245 #ifdef LIBGOMP_USE_PTHREADS
246 pthread_detach (pthread_self ());
248 #elif defined(__nvptx__)
250 #elif defined(__AMDGCN__)
251 asm ("s_dcache_wb\n\t"
254 #error gomp_free_pool_helper must terminate the thread
258 /* Free a thread pool and release its threads. */
261 gomp_free_thread (void *arg
__attribute__((unused
)))
263 struct gomp_thread
*thr
= gomp_thread ();
264 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
267 if (pool
->threads_used
> 0)
270 for (i
= 1; i
< pool
->threads_used
; i
++)
272 struct gomp_thread
*nthr
= pool
->threads
[i
];
273 nthr
->fn
= gomp_free_pool_helper
;
276 /* This barrier undocks threads docked on pool->threads_dock. */
277 gomp_simple_barrier_wait (&pool
->threads_dock
);
278 /* And this waits till all threads have called gomp_barrier_wait_last
279 in gomp_free_pool_helper. */
280 gomp_simple_barrier_wait (&pool
->threads_dock
);
281 /* Now it is safe to destroy the barrier and free the pool. */
282 gomp_simple_barrier_destroy (&pool
->threads_dock
);
284 #ifdef HAVE_SYNC_BUILTINS
285 __sync_fetch_and_add (&gomp_managed_threads
,
286 1L - pool
->threads_used
);
288 gomp_mutex_lock (&gomp_managed_threads_lock
);
289 gomp_managed_threads
-= pool
->threads_used
- 1L;
290 gomp_mutex_unlock (&gomp_managed_threads_lock
);
294 free_team (pool
->last_team
);
296 team_free (pool
->threads
);
299 thr
->thread_pool
= NULL
;
301 if (thr
->ts
.level
== 0 && __builtin_expect (thr
->ts
.team
!= NULL
, 0))
303 if (thr
->task
!= NULL
)
305 struct gomp_task
*task
= thr
->task
;
313 #ifdef LIBGOMP_USE_PTHREADS
315 gomp_team_start (void (*fn
) (void *), void *data
, unsigned nthreads
,
316 unsigned flags
, struct gomp_team
*team
,
317 struct gomp_taskgroup
*taskgroup
)
319 struct gomp_thread_start_data
*start_data
= NULL
;
320 struct gomp_thread
*thr
, *nthr
;
321 struct gomp_task
*task
;
322 struct gomp_task_icv
*icv
;
324 struct gomp_thread_pool
*pool
;
325 unsigned i
, n
, old_threads_used
= 0;
326 pthread_attr_t thread_attr
, *attr
;
327 unsigned long nthreads_var
;
329 unsigned int s
= 0, rest
= 0, p
= 0, k
= 0;
330 unsigned int affinity_count
= 0;
331 struct gomp_thread
**affinity_thr
= NULL
;
332 bool force_display
= false;
334 thr
= gomp_thread ();
335 nested
= thr
->ts
.level
;
336 pool
= thr
->thread_pool
;
338 icv
= task
? &task
->icv
: &gomp_global_icv
;
339 if (__builtin_expect (gomp_places_list
!= NULL
, 0) && thr
->place
== 0)
341 gomp_init_affinity ();
342 if (__builtin_expect (gomp_display_affinity_var
, 0) && nthreads
== 1)
343 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
347 /* Always save the previous state, even if this isn't a nested team.
348 In particular, we should save any work share state from an outer
349 orphaned work share construct. */
350 team
->prev_ts
= thr
->ts
;
356 ++thr
->ts
.active_level
;
357 thr
->ts
.work_share
= &team
->work_shares
[0];
358 thr
->ts
.last_work_share
= NULL
;
359 #ifdef HAVE_SYNC_BUILTINS
360 thr
->ts
.single_count
= 0;
362 thr
->ts
.static_trip
= 0;
363 thr
->task
= &team
->implicit_task
[0];
364 #ifdef GOMP_NEEDS_THREAD_HANDLE
365 thr
->handle
= pthread_self ();
367 nthreads_var
= icv
->nthreads_var
;
368 if (__builtin_expect (gomp_nthreads_var_list
!= NULL
, 0)
369 && thr
->ts
.level
< gomp_nthreads_var_list_len
)
370 nthreads_var
= gomp_nthreads_var_list
[thr
->ts
.level
];
371 bind_var
= icv
->bind_var
;
372 if (bind_var
!= omp_proc_bind_false
&& (flags
& 7) != omp_proc_bind_false
)
373 bind_var
= flags
& 7;
375 if (__builtin_expect (gomp_bind_var_list
!= NULL
, 0)
376 && thr
->ts
.level
< gomp_bind_var_list_len
)
377 bind_var
= gomp_bind_var_list
[thr
->ts
.level
];
378 gomp_init_task (thr
->task
, task
, icv
);
379 thr
->task
->taskgroup
= taskgroup
;
380 team
->implicit_task
[0].icv
.nthreads_var
= nthreads_var
;
381 team
->implicit_task
[0].icv
.bind_var
= bind_var
;
388 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
390 /* Depending on chosen proc_bind model, set subpartition
391 for the master thread and initialize helper variables
392 P and optionally S, K and/or REST used by later place
393 computation for each additional thread. */
397 case omp_proc_bind_true
:
398 case omp_proc_bind_close
:
399 if (nthreads
> thr
->ts
.place_partition_len
)
401 /* T > P. S threads will be placed in each place,
402 and the final REM threads placed one by one
403 into the already occupied places. */
404 s
= nthreads
/ thr
->ts
.place_partition_len
;
405 rest
= nthreads
% thr
->ts
.place_partition_len
;
411 case omp_proc_bind_master
:
412 /* Each thread will be bound to master's place. */
414 case omp_proc_bind_spread
:
415 if (nthreads
<= thr
->ts
.place_partition_len
)
417 /* T <= P. Each subpartition will have in between s
418 and s+1 places (subpartitions starting at or
419 after rest will have s places, earlier s+1 places),
420 each thread will be bound to the first place in
421 its subpartition (except for the master thread
422 that can be bound to another place in its
424 s
= thr
->ts
.place_partition_len
/ nthreads
;
425 rest
= thr
->ts
.place_partition_len
% nthreads
;
426 rest
= (s
+ 1) * rest
+ thr
->ts
.place_partition_off
;
429 p
-= (p
- thr
->ts
.place_partition_off
) % (s
+ 1);
430 thr
->ts
.place_partition_len
= s
+ 1;
435 thr
->ts
.place_partition_len
= s
;
437 thr
->ts
.place_partition_off
= p
;
441 /* T > P. Each subpartition will have just a single
442 place and we'll place between s and s+1
443 threads into each subpartition. */
444 s
= nthreads
/ thr
->ts
.place_partition_len
;
445 rest
= nthreads
% thr
->ts
.place_partition_len
;
446 thr
->ts
.place_partition_off
= p
;
447 thr
->ts
.place_partition_len
= 1;
454 bind
= omp_proc_bind_false
;
456 /* We only allow the reuse of idle threads for non-nested PARALLEL
457 regions. This appears to be implied by the semantics of
458 threadprivate variables, but perhaps that's reading too much into
459 things. Certainly it does prevent any locking problems, since
460 only the initial program thread will modify gomp_threads. */
463 old_threads_used
= pool
->threads_used
;
465 if (nthreads
<= old_threads_used
)
467 else if (old_threads_used
== 0)
470 gomp_simple_barrier_init (&pool
->threads_dock
, nthreads
);
474 n
= old_threads_used
;
476 /* Increase the barrier threshold to make sure all new
477 threads arrive before the team is released. */
478 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
481 /* Not true yet, but soon will be. We're going to release all
482 threads from the dock, and those that aren't part of the
484 pool
->threads_used
= nthreads
;
486 /* If necessary, expand the size of the gomp_threads array. It is
487 expected that changes in the number of threads are rare, thus we
488 make no effort to expand gomp_threads_size geometrically. */
489 if (nthreads
>= pool
->threads_size
)
491 pool
->threads_size
= nthreads
+ 1;
493 = gomp_realloc (pool
->threads
,
495 * sizeof (struct gomp_thread
*));
496 /* Add current (master) thread to threads[]. */
497 pool
->threads
[0] = thr
;
500 /* Release existing idle threads. */
503 unsigned int place_partition_off
= thr
->ts
.place_partition_off
;
504 unsigned int place_partition_len
= thr
->ts
.place_partition_len
;
505 unsigned int place
= 0;
506 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
510 case omp_proc_bind_true
:
511 case omp_proc_bind_close
:
515 if (p
== (team
->prev_ts
.place_partition_off
516 + team
->prev_ts
.place_partition_len
))
517 p
= team
->prev_ts
.place_partition_off
;
519 if (i
== nthreads
- rest
)
525 case omp_proc_bind_master
:
527 case omp_proc_bind_spread
:
535 if (p
== (team
->prev_ts
.place_partition_off
536 + team
->prev_ts
.place_partition_len
))
537 p
= team
->prev_ts
.place_partition_off
;
538 place_partition_off
= p
;
540 place_partition_len
= s
+ 1;
542 place_partition_len
= s
;
550 if (p
== (team
->prev_ts
.place_partition_off
551 + team
->prev_ts
.place_partition_len
))
552 p
= team
->prev_ts
.place_partition_off
;
554 if (i
== nthreads
- rest
)
559 place_partition_off
= p
;
560 place_partition_len
= 1;
564 if (affinity_thr
!= NULL
565 || (bind
!= omp_proc_bind_true
566 && pool
->threads
[i
]->place
!= p
+ 1)
567 || pool
->threads
[i
]->place
<= place_partition_off
568 || pool
->threads
[i
]->place
> (place_partition_off
569 + place_partition_len
))
572 force_display
= true;
573 if (affinity_thr
== NULL
)
577 if (team
->prev_ts
.place_partition_len
> 64)
579 = gomp_malloc (team
->prev_ts
.place_partition_len
580 * sizeof (struct gomp_thread
*));
583 = gomp_alloca (team
->prev_ts
.place_partition_len
584 * sizeof (struct gomp_thread
*));
585 memset (affinity_thr
, '\0',
586 team
->prev_ts
.place_partition_len
587 * sizeof (struct gomp_thread
*));
588 for (j
= i
; j
< old_threads_used
; j
++)
590 if (pool
->threads
[j
]->place
591 > team
->prev_ts
.place_partition_off
592 && (pool
->threads
[j
]->place
593 <= (team
->prev_ts
.place_partition_off
594 + team
->prev_ts
.place_partition_len
)))
596 l
= pool
->threads
[j
]->place
- 1
597 - team
->prev_ts
.place_partition_off
;
598 pool
->threads
[j
]->data
= affinity_thr
[l
];
599 affinity_thr
[l
] = pool
->threads
[j
];
601 pool
->threads
[j
] = NULL
;
603 if (nthreads
> old_threads_used
)
604 memset (&pool
->threads
[old_threads_used
],
605 '\0', ((nthreads
- old_threads_used
)
606 * sizeof (struct gomp_thread
*)));
608 affinity_count
= old_threads_used
- i
;
610 if (affinity_count
== 0)
613 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
616 if (bind
!= omp_proc_bind_true
)
618 for (l
= place_partition_off
;
619 l
< place_partition_off
+ place_partition_len
;
621 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
624 if (l
== place_partition_off
+ place_partition_len
)
627 nthr
= affinity_thr
[l
- team
->prev_ts
.place_partition_off
];
628 affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
629 = (struct gomp_thread
*) nthr
->data
;
631 pool
->threads
[i
] = nthr
;
634 nthr
= pool
->threads
[i
];
638 nthr
= pool
->threads
[i
];
639 nthr
->ts
.team
= team
;
640 nthr
->ts
.work_share
= &team
->work_shares
[0];
641 nthr
->ts
.last_work_share
= NULL
;
642 nthr
->ts
.team_id
= i
;
643 nthr
->ts
.level
= team
->prev_ts
.level
+ 1;
644 nthr
->ts
.active_level
= thr
->ts
.active_level
;
645 nthr
->ts
.place_partition_off
= place_partition_off
;
646 nthr
->ts
.place_partition_len
= place_partition_len
;
647 nthr
->ts
.def_allocator
= thr
->ts
.def_allocator
;
648 #ifdef HAVE_SYNC_BUILTINS
649 nthr
->ts
.single_count
= 0;
651 nthr
->ts
.static_trip
= 0;
652 nthr
->num_teams
= thr
->num_teams
;
653 nthr
->team_num
= thr
->team_num
;
654 nthr
->task
= &team
->implicit_task
[i
];
656 gomp_init_task (nthr
->task
, task
, icv
);
657 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
658 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
659 nthr
->task
->taskgroup
= taskgroup
;
662 team
->ordered_release
[i
] = &nthr
->release
;
665 if (__builtin_expect (affinity_thr
!= NULL
, 0))
667 /* If AFFINITY_THR is non-NULL just because we had to
668 permute some threads in the pool, but we've managed
669 to find exactly as many old threads as we'd find
670 without affinity, we don't need to handle this
671 specially anymore. */
672 if (nthreads
<= old_threads_used
673 ? (affinity_count
== old_threads_used
- nthreads
)
674 : (i
== old_threads_used
))
676 if (team
->prev_ts
.place_partition_len
> 64)
684 /* We are going to compute the places/subpartitions
685 again from the beginning. So, we need to reinitialize
686 vars modified by the switch (bind) above inside
687 of the loop, to the state they had after the initial
691 case omp_proc_bind_true
:
692 case omp_proc_bind_close
:
693 if (nthreads
> thr
->ts
.place_partition_len
)
694 /* T > P. S has been changed, so needs
696 s
= nthreads
/ thr
->ts
.place_partition_len
;
700 case omp_proc_bind_master
:
701 /* No vars have been changed. */
703 case omp_proc_bind_spread
:
704 p
= thr
->ts
.place_partition_off
;
708 s
= nthreads
/ team
->prev_ts
.place_partition_len
;
714 /* Increase the barrier threshold to make sure all new
715 threads and all the threads we're going to let die
716 arrive before the team is released. */
718 gomp_simple_barrier_reinit (&pool
->threads_dock
,
719 nthreads
+ affinity_count
);
728 if (__builtin_expect (nthreads
+ affinity_count
> old_threads_used
, 0))
730 long diff
= (long) (nthreads
+ affinity_count
) - (long) old_threads_used
;
732 if (old_threads_used
== 0)
735 #ifdef HAVE_SYNC_BUILTINS
736 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
738 gomp_mutex_lock (&gomp_managed_threads_lock
);
739 gomp_managed_threads
+= diff
;
740 gomp_mutex_unlock (&gomp_managed_threads_lock
);
744 attr
= &gomp_thread_attr
;
745 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
748 pthread_attr_init (&thread_attr
);
749 if (! pthread_attr_getstacksize (&gomp_thread_attr
, &stacksize
))
750 pthread_attr_setstacksize (&thread_attr
, stacksize
);
754 start_data
= gomp_alloca (sizeof (struct gomp_thread_start_data
)
757 /* Launch new threads. */
758 for (; i
< nthreads
; ++i
)
762 start_data
->ts
.place_partition_off
= thr
->ts
.place_partition_off
;
763 start_data
->ts
.place_partition_len
= thr
->ts
.place_partition_len
;
764 start_data
->place
= 0;
765 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
769 case omp_proc_bind_true
:
770 case omp_proc_bind_close
:
774 if (p
== (team
->prev_ts
.place_partition_off
775 + team
->prev_ts
.place_partition_len
))
776 p
= team
->prev_ts
.place_partition_off
;
778 if (i
== nthreads
- rest
)
784 case omp_proc_bind_master
:
786 case omp_proc_bind_spread
:
794 if (p
== (team
->prev_ts
.place_partition_off
795 + team
->prev_ts
.place_partition_len
))
796 p
= team
->prev_ts
.place_partition_off
;
797 start_data
->ts
.place_partition_off
= p
;
799 start_data
->ts
.place_partition_len
= s
+ 1;
801 start_data
->ts
.place_partition_len
= s
;
809 if (p
== (team
->prev_ts
.place_partition_off
810 + team
->prev_ts
.place_partition_len
))
811 p
= team
->prev_ts
.place_partition_off
;
813 if (i
== nthreads
- rest
)
818 start_data
->ts
.place_partition_off
= p
;
819 start_data
->ts
.place_partition_len
= 1;
823 start_data
->place
= p
+ 1;
824 if (affinity_thr
!= NULL
&& pool
->threads
[i
] != NULL
)
826 gomp_init_thread_affinity (attr
, p
);
830 start_data
->fn_data
= data
;
831 start_data
->ts
.team
= team
;
832 start_data
->ts
.work_share
= &team
->work_shares
[0];
833 start_data
->ts
.last_work_share
= NULL
;
834 start_data
->ts
.team_id
= i
;
835 start_data
->ts
.level
= team
->prev_ts
.level
+ 1;
836 start_data
->ts
.active_level
= thr
->ts
.active_level
;
837 start_data
->ts
.def_allocator
= thr
->ts
.def_allocator
;
838 #ifdef HAVE_SYNC_BUILTINS
839 start_data
->ts
.single_count
= 0;
841 start_data
->ts
.static_trip
= 0;
842 start_data
->num_teams
= thr
->num_teams
;
843 start_data
->team_num
= thr
->team_num
;
844 start_data
->task
= &team
->implicit_task
[i
];
845 gomp_init_task (start_data
->task
, task
, icv
);
846 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
847 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
848 start_data
->task
->taskgroup
= taskgroup
;
849 start_data
->thread_pool
= pool
;
850 start_data
->nested
= nested
;
852 attr
= gomp_adjust_thread_attr (attr
, &thread_attr
);
853 err
= pthread_create (&start_data
->handle
, attr
, gomp_thread_start
,
857 gomp_fatal ("Thread creation failed: %s", strerror (err
));
860 if (__builtin_expect (attr
== &thread_attr
, 0))
861 pthread_attr_destroy (&thread_attr
);
865 gomp_barrier_wait (&team
->barrier
);
867 gomp_simple_barrier_wait (&pool
->threads_dock
);
869 /* Decrease the barrier threshold to match the number of threads
870 that should arrive back at the end of this team. The extra
871 threads should be exiting. Note that we arrange for this test
872 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
873 the barrier as well as gomp_managed_threads was temporarily
874 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
875 AFFINITY_COUNT if non-zero will be always at least
876 OLD_THREADS_COUNT - NTHREADS. */
877 if (__builtin_expect (nthreads
< old_threads_used
, 0)
878 || __builtin_expect (affinity_count
, 0))
880 long diff
= (long) nthreads
- (long) old_threads_used
;
883 diff
= -affinity_count
;
885 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
887 #ifdef HAVE_SYNC_BUILTINS
888 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
890 gomp_mutex_lock (&gomp_managed_threads_lock
);
891 gomp_managed_threads
+= diff
;
892 gomp_mutex_unlock (&gomp_managed_threads_lock
);
895 if (__builtin_expect (gomp_display_affinity_var
, 0))
898 || nthreads
!= old_threads_used
901 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
905 start_data
-= nthreads
- 1;
906 for (i
= 1; i
< nthreads
; ++i
)
908 gomp_display_affinity_thread (
909 #ifdef LIBGOMP_USE_PTHREADS
921 for (i
= 1; i
< nthreads
; ++i
)
923 gomp_thread_handle handle
924 = gomp_thread_to_pthread_t (pool
->threads
[i
]);
925 gomp_display_affinity_thread (handle
, &pool
->threads
[i
]->ts
,
926 pool
->threads
[i
]->place
);
931 if (__builtin_expect (affinity_thr
!= NULL
, 0)
932 && team
->prev_ts
.place_partition_len
> 64)
938 /* Terminate the current team. This is only to be called by the master
939 thread. We assume that we must wait for the other threads. */
944 struct gomp_thread
*thr
= gomp_thread ();
945 struct gomp_team
*team
= thr
->ts
.team
;
947 /* This barrier handles all pending explicit threads.
948 As #pragma omp cancel parallel might get awaited count in
949 team->barrier in a inconsistent state, we need to use a different
951 gomp_team_barrier_wait_final (&team
->barrier
);
952 if (__builtin_expect (team
->team_cancelled
, 0))
954 struct gomp_work_share
*ws
= team
->work_shares_to_free
;
957 struct gomp_work_share
*next_ws
= gomp_ptrlock_get (&ws
->next_ws
);
959 gomp_ptrlock_set (&ws
->next_ws
, ws
);
960 gomp_fini_work_share (ws
);
966 gomp_fini_work_share (thr
->ts
.work_share
);
969 thr
->ts
= team
->prev_ts
;
971 if (__builtin_expect (thr
->ts
.level
!= 0, 0))
973 #ifdef HAVE_SYNC_BUILTINS
974 __sync_fetch_and_add (&gomp_managed_threads
, 1L - team
->nthreads
);
976 gomp_mutex_lock (&gomp_managed_threads_lock
);
977 gomp_managed_threads
-= team
->nthreads
- 1L;
978 gomp_mutex_unlock (&gomp_managed_threads_lock
);
980 /* This barrier has gomp_barrier_wait_last counterparts
981 and ensures the team can be safely destroyed. */
982 gomp_barrier_wait (&team
->barrier
);
985 if (__builtin_expect (team
->work_shares
[0].next_alloc
!= NULL
, 0))
987 struct gomp_work_share
*ws
= team
->work_shares
[0].next_alloc
;
990 struct gomp_work_share
*next_ws
= ws
->next_alloc
;
996 gomp_sem_destroy (&team
->master_release
);
998 if (__builtin_expect (thr
->ts
.team
!= NULL
, 0)
999 || __builtin_expect (team
->nthreads
== 1, 0))
1003 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1004 if (pool
->last_team
)
1005 free_team (pool
->last_team
);
1006 pool
->last_team
= team
;
1007 gomp_release_thread_pool (pool
);
1011 #ifdef LIBGOMP_USE_PTHREADS
1013 /* Constructors for this file. */
1015 static void __attribute__((constructor
))
1016 initialize_team (void)
1018 #if !defined HAVE_TLS && !defined USE_EMUTLS
1019 static struct gomp_thread initial_thread_tls_data
;
1021 pthread_key_create (&gomp_tls_key
, NULL
);
1022 pthread_setspecific (gomp_tls_key
, &initial_thread_tls_data
);
1025 if (pthread_key_create (&gomp_thread_destructor
, gomp_free_thread
) != 0)
1026 gomp_fatal ("could not create thread pool destructor.");
1029 static void __attribute__((destructor
))
1030 team_destructor (void)
1032 /* Without this dlclose on libgomp could lead to subsequent
1034 pthread_key_delete (gomp_thread_destructor
);
1037 /* Similar to gomp_free_pool_helper, but don't detach itself,
1038 gomp_pause_host will pthread_join those threads. */
1041 gomp_pause_pool_helper (void *thread_pool
)
1043 struct gomp_thread
*thr
= gomp_thread ();
1044 struct gomp_thread_pool
*pool
1045 = (struct gomp_thread_pool
*) thread_pool
;
1046 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
1047 gomp_sem_destroy (&thr
->release
);
1048 thr
->thread_pool
= NULL
;
1050 pthread_exit (NULL
);
1053 /* Free a thread pool and release its threads. Return non-zero on
1057 gomp_pause_host (void)
1059 struct gomp_thread
*thr
= gomp_thread ();
1060 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1065 if (pool
->threads_used
> 0)
1069 = gomp_alloca (sizeof (pthread_t
) * pool
->threads_used
);
1070 for (i
= 1; i
< pool
->threads_used
; i
++)
1072 struct gomp_thread
*nthr
= pool
->threads
[i
];
1073 nthr
->fn
= gomp_pause_pool_helper
;
1075 thrs
[i
] = gomp_thread_to_pthread_t (nthr
);
1077 /* This barrier undocks threads docked on pool->threads_dock. */
1078 gomp_simple_barrier_wait (&pool
->threads_dock
);
1079 /* And this waits till all threads have called gomp_barrier_wait_last
1080 in gomp_pause_pool_helper. */
1081 gomp_simple_barrier_wait (&pool
->threads_dock
);
1082 /* Now it is safe to destroy the barrier and free the pool. */
1083 gomp_simple_barrier_destroy (&pool
->threads_dock
);
1085 #ifdef HAVE_SYNC_BUILTINS
1086 __sync_fetch_and_add (&gomp_managed_threads
,
1087 1L - pool
->threads_used
);
1089 gomp_mutex_lock (&gomp_managed_threads_lock
);
1090 gomp_managed_threads
-= pool
->threads_used
- 1L;
1091 gomp_mutex_unlock (&gomp_managed_threads_lock
);
1093 for (i
= 1; i
< pool
->threads_used
; i
++)
1094 pthread_join (thrs
[i
], NULL
);
1096 if (pool
->last_team
)
1097 free_team (pool
->last_team
);
1099 team_free (pool
->threads
);
1102 thr
->thread_pool
= NULL
;
1108 struct gomp_task_icv
*
1111 struct gomp_thread
*thr
= gomp_thread ();
1112 struct gomp_task
*task
= gomp_malloc (sizeof (struct gomp_task
));
1113 gomp_init_task (task
, NULL
, &gomp_global_icv
);
1115 #ifdef LIBGOMP_USE_PTHREADS
1116 pthread_setspecific (gomp_thread_destructor
, thr
);